66 #include "llvm/IR/IntrinsicsAArch64.h"
102 using namespace llvm;
105 #define DEBUG_TYPE "aarch64-lower"
107 STATISTIC(NumTailCalls,
"Number of tail calls");
108 STATISTIC(NumShiftInserts,
"Number of vector shift inserts");
109 STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
116 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
121 cl::desc(
"Enable AArch64 logical imm instruction "
131 cl::desc(
"Combine extends of AArch64 masked "
132 "gather intrinsics"),
145 AArch64::X3, AArch64::X4, AArch64::X5,
146 AArch64::X6, AArch64::X7};
148 AArch64::Q3, AArch64::Q4, AArch64::Q5,
149 AArch64::Q6, AArch64::Q7};
181 switch (EC.getKnownMinValue()) {
197 "Expected scalable predicate vector type!");
219 "Expected legal vector type!");
265 switch (
Op.getOpcode()) {
276 switch (
Op.getConstantOperandVal(0)) {
279 case Intrinsic::aarch64_sve_ptrue:
280 case Intrinsic::aarch64_sve_pnext:
281 case Intrinsic::aarch64_sve_cmpeq:
282 case Intrinsic::aarch64_sve_cmpne:
283 case Intrinsic::aarch64_sve_cmpge:
284 case Intrinsic::aarch64_sve_cmpgt:
285 case Intrinsic::aarch64_sve_cmphs:
286 case Intrinsic::aarch64_sve_cmphi:
287 case Intrinsic::aarch64_sve_cmpeq_wide:
288 case Intrinsic::aarch64_sve_cmpne_wide:
289 case Intrinsic::aarch64_sve_cmpge_wide:
290 case Intrinsic::aarch64_sve_cmpgt_wide:
291 case Intrinsic::aarch64_sve_cmplt_wide:
292 case Intrinsic::aarch64_sve_cmple_wide:
293 case Intrinsic::aarch64_sve_cmphs_wide:
294 case Intrinsic::aarch64_sve_cmphi_wide:
295 case Intrinsic::aarch64_sve_cmplo_wide:
296 case Intrinsic::aarch64_sve_cmpls_wide:
297 case Intrinsic::aarch64_sve_fcmpeq:
298 case Intrinsic::aarch64_sve_fcmpne:
299 case Intrinsic::aarch64_sve_fcmpge:
300 case Intrinsic::aarch64_sve_fcmpgt:
301 case Intrinsic::aarch64_sve_fcmpuo:
302 case Intrinsic::aarch64_sve_facgt:
303 case Intrinsic::aarch64_sve_facge:
304 case Intrinsic::aarch64_sve_whilege:
305 case Intrinsic::aarch64_sve_whilegt:
306 case Intrinsic::aarch64_sve_whilehi:
307 case Intrinsic::aarch64_sve_whilehs:
308 case Intrinsic::aarch64_sve_whilele:
309 case Intrinsic::aarch64_sve_whilelo:
310 case Intrinsic::aarch64_sve_whilels:
311 case Intrinsic::aarch64_sve_whilelt:
312 case Intrinsic::aarch64_sve_match:
313 case Intrinsic::aarch64_sve_nmatch:
314 case Intrinsic::aarch64_sve_whilege_x2:
315 case Intrinsic::aarch64_sve_whilegt_x2:
316 case Intrinsic::aarch64_sve_whilehi_x2:
317 case Intrinsic::aarch64_sve_whilehs_x2:
318 case Intrinsic::aarch64_sve_whilele_x2:
319 case Intrinsic::aarch64_sve_whilelo_x2:
320 case Intrinsic::aarch64_sve_whilels_x2:
321 case Intrinsic::aarch64_sve_whilelt_x2:
341 if (Subtarget->hasLS64()) {
347 if (Subtarget->hasFPARMv8()) {
355 if (Subtarget->hasNEON()) {
366 if (Subtarget->hasBF16())
376 if (Subtarget->hasBF16())
401 if (Subtarget->hasBF16()) {
577 if (Subtarget->hasCSSC()) {
656 if (Subtarget->hasFullFP16())
673 if (!Subtarget->hasFullFP16()) {
757 if (Subtarget->hasFullFP16())
766 if (Subtarget->hasFullFP16())
787 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
813 #define LCALLNAMES(A, B, N) \
814 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
815 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
816 setLibcallName(A##N##_REL, #B #N "_rel"); \
817 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
818 #define LCALLNAME4(A, B) \
819 LCALLNAMES(A, B, 1) \
820 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
821 #define LCALLNAME5(A, B) \
822 LCALLNAMES(A, B, 1) \
823 LCALLNAMES(A, B, 2) \
824 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
825 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
826 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
827 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
828 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
829 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
830 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
836 if (Subtarget->hasLSE128()) {
850 if (Subtarget->hasLSE2()) {
1037 if (Subtarget->hasNEON()) {
1077 if (Subtarget->hasFullFP16()) {
1145 if (VT.getVectorElementType() !=
MVT::f16 || Subtarget->hasFullFP16()) {
1198 if (Subtarget->hasFullFP16())
1220 if (Subtarget->hasSME()) {
1234 if (Subtarget->hasSVE()) {
1292 if (Subtarget->hasSVE2()) {
1483 addTypeForStreamingSVE(VT);
1487 addTypeForStreamingSVE(VT);
1495 addTypeForFixedLengthSVE(VT);
1498 addTypeForFixedLengthSVE(VT);
1557 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1569 void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
1624 for (
unsigned Opcode :
1642 for (
unsigned Opcode :
1678 if (Subtarget->hasD128()) {
1687 if (!Subtarget->hasSVE())
1704 void AArch64TargetLowering::addTypeForStreamingSVE(
MVT VT) {
1734 while (InnerVT != VT) {
1747 while (InnerVT != VT) {
1836 void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
1858 while (InnerVT != VT) {
1871 while (InnerVT != VT) {
1966 void AArch64TargetLowering::addDRTypeForNEON(
MVT VT) {
1971 void AArch64TargetLowering::addQRTypeForNEON(
MVT VT) {
1989 Imm =
C->getZExtValue();
2000 return N->getOpcode() == Opc &&
2005 const APInt &Demanded,
2017 unsigned EltSize = Size;
2034 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2036 uint64_t Sum = RotatedImm + NonDemandedBits;
2037 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2038 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2067 while (EltSize < Size) {
2068 NewImm |= NewImm << EltSize;
2074 "demanded bits should never be altered");
2075 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2078 EVT VT =
Op.getValueType();
2084 if (NewImm == 0 || NewImm == OrigMask) {
2109 EVT VT =
Op.getValueType();
2114 assert((Size == 32 || Size == 64) &&
2115 "i32 or i64 is expected after legalization.");
2122 switch (
Op.getOpcode()) {
2126 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2129 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2132 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2147 switch (
Op.getOpcode()) {
2153 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2154 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2155 "Expected DUP implicit truncation");
2156 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2170 ~(
Op->getConstantOperandVal(1) <<
Op->getConstantOperandVal(2));
2213 case Intrinsic::aarch64_ldaxr:
2214 case Intrinsic::aarch64_ldxr: {
2216 EVT VT = cast<MemIntrinsicSDNode>(
Op)->getMemoryVT();
2226 unsigned IntNo = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
2230 case Intrinsic::aarch64_neon_umaxv:
2231 case Intrinsic::aarch64_neon_uminv: {
2236 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2261 unsigned *Fast)
const {
2262 if (Subtarget->requiresStrictAlign())
2267 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
2286 unsigned *Fast)
const {
2287 if (Subtarget->requiresStrictAlign())
2292 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
2316 #define MAKE_CASE(V) \
2650 Register DestReg =
MI.getOperand(0).getReg();
2651 Register IfTrueReg =
MI.getOperand(1).getReg();
2652 Register IfFalseReg =
MI.getOperand(2).getReg();
2653 unsigned CondCode =
MI.getOperand(3).getImm();
2654 bool NZCVKilled =
MI.getOperand(4).isKill();
2685 MI.eraseFromParent();
2692 BB->getParent()->getFunction().getPersonalityFn())) &&
2693 "SEH does not use catchret!");
2705 MIB.
add(
MI.getOperand(1));
2706 MIB.
add(
MI.getOperand(2));
2707 MIB.
add(
MI.getOperand(3));
2708 MIB.
add(
MI.getOperand(4));
2709 MIB.
add(
MI.getOperand(5));
2711 MI.eraseFromParent();
2722 MIB.
add(
MI.getOperand(0));
2723 MIB.
add(
MI.getOperand(1));
2724 MIB.
add(
MI.getOperand(2));
2725 MIB.
add(
MI.getOperand(1));
2727 MI.eraseFromParent();
2737 unsigned StartIdx = 0;
2741 MIB.
addReg(BaseReg +
MI.getOperand(0).getImm());
2746 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
2747 MIB.
add(
MI.getOperand(
I));
2749 MI.eraseFromParent();
2758 MIB.
add(
MI.getOperand(0));
2760 unsigned Mask =
MI.getOperand(0).getImm();
2761 for (
unsigned I = 0;
I < 8;
I++) {
2762 if (
Mask & (1 <<
I))
2766 MI.eraseFromParent();
2774 if (SMEOrigInstr != -1) {
2794 switch (
MI.getOpcode()) {
2801 case AArch64::F128CSEL:
2803 case TargetOpcode::STATEPOINT:
2809 MI.addOperand(*
MI.getMF(),
2821 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
2823 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
2825 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
2827 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
2829 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
2831 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
2833 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
2835 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
2837 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
2839 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
2841 case AArch64::LDR_ZA_PSEUDO:
2843 case AArch64::ZERO_M_PSEUDO:
2869 N =
N->getOperand(0).getNode();
2877 auto Opnd0 =
N->getOperand(0);
3037 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3039 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3061 EVT VT =
LHS.getValueType();
3071 Chain =
RHS.getValue(1);
3081 EVT VT =
LHS.getValueType();
3123 return LHS.getValue(1);
3189 unsigned Opcode = 0;
3192 if (
LHS.getValueType().isFloatingPoint()) {
3194 if (
LHS.getValueType() ==
MVT::f16 && !FullFP16) {
3232 bool &MustBeFirst,
bool WillNegate,
3233 unsigned Depth = 0) {
3241 MustBeFirst =
false;
3248 bool IsOR = Opcode ==
ISD::OR;
3260 if (MustBeFirstL && MustBeFirstR)
3266 if (!CanNegateL && !CanNegateR)
3270 CanNegate = WillNegate && CanNegateL && CanNegateR;
3273 MustBeFirst = !CanNegate;
3278 MustBeFirst = MustBeFirstL || MustBeFirstR;
3310 assert(
LHS.getValueType().isFloatingPoint());
3336 bool IsOR = Opcode ==
ISD::OR;
3342 assert(ValidL &&
"Valid conjunction/disjunction tree");
3349 assert(ValidR &&
"Valid conjunction/disjunction tree");
3354 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
3363 bool NegateAfterAll;
3367 assert(CanNegateR &&
"at least one side must be negatable");
3368 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
3372 NegateAfterR =
true;
3375 NegateR = CanNegateR;
3376 NegateAfterR = !CanNegateR;
3379 NegateAfterAll = !Negate;
3381 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
3382 assert(!Negate &&
"Valid conjunction/disjunction tree");
3386 NegateAfterR =
false;
3387 NegateAfterAll =
false;
3407 bool DummyCanNegate;
3408 bool DummyMustBeFirst;
3420 auto isSupportedExtend = [&](
SDValue V) {
3425 if (
ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
3427 return (
Mask == 0xFF ||
Mask == 0xFFFF ||
Mask == 0xFFFFFFFF);
3433 if (!
Op.hasOneUse())
3436 if (isSupportedExtend(
Op))
3439 unsigned Opc =
Op.getOpcode();
3441 if (
ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
3443 if (isSupportedExtend(
Op.getOperand(0)))
3444 return (
Shift <= 4) ? 2 : 1;
3445 EVT VT =
Op.getValueType();
3457 EVT VT =
RHS.getValueType();
3466 if ((VT ==
MVT::i32 &&
C != 0x80000000 &&
3468 (VT ==
MVT::i64 &&
C != 0x80000000ULL &&
3487 if ((VT ==
MVT::i32 &&
C != INT32_MAX &&
3498 if ((VT ==
MVT::i32 &&
C != UINT32_MAX &&
3521 if (!isa<ConstantSDNode>(
RHS) ||
3554 cast<LoadSDNode>(
LHS)->getMemoryVT() ==
MVT::i16 &&
3555 LHS.getNode()->hasNUsesOfValue(1, 0)) {
3556 int16_t ValueofRHS = cast<ConstantSDNode>(
RHS)->getZExtValue();
3562 RHS.getValueType()),
3584 static std::pair<SDValue, SDValue>
3587 "Unsupported value type");
3593 switch (
Op.getOpcode()) {
3669 Overflow =
Value.getValue(1);
3671 return std::make_pair(
Value, Overflow);
3677 return LowerToScalableOp(
Op, DAG);
3731 if (!CFVal || !CTVal)
3768 return Cmp.getValue(1);
3798 EVT VT0 =
Op.getValue(0).getValueType();
3799 EVT VT1 =
Op.getValue(1).getValueType();
3855 unsigned IsWrite = cast<ConstantSDNode>(
Op.getOperand(2))->getZExtValue();
3856 unsigned Locality = cast<ConstantSDNode>(
Op.getOperand(3))->getZExtValue();
3857 unsigned IsData = cast<ConstantSDNode>(
Op.getOperand(4))->getZExtValue();
3859 bool IsStream = !Locality;
3863 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
3867 Locality = 3 - Locality;
3871 unsigned PrfOp = (IsWrite << 4) |
3882 EVT VT =
Op.getValueType();
3888 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
3896 if (
Op.getValueType().isScalableVector())
3899 bool IsStrict =
Op->isStrictFPOpcode();
3900 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
3905 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
3924 bool IsStrict =
Op->isStrictFPOpcode();
3925 EVT InVT =
Op.getOperand(IsStrict ? 1 : 0).getValueType();
3926 EVT VT =
Op.getValueType();
3932 return LowerToPredicatedOp(
Op, DAG, Opcode);
3939 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
3945 !Subtarget->hasFullFP16()) {
3950 {
Op.getOperand(0),
Op.getOperand(1)});
3951 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
3952 {Ext.getValue(1), Ext.getValue(0)});
3955 Op.getOpcode(), dl,
Op.getValueType(),
3961 if (VTSize < InVTSize) {
3966 {Op.getOperand(0), Op.getOperand(1)});
3976 if (VTSize > InVTSize) {
3983 {
Op.getOperand(0),
Op.getOperand(1)});
3984 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
3985 {Ext.getValue(1), Ext.getValue(0)});
4000 return DAG.
getNode(
Op.getOpcode(), dl, {ScalarVT, MVT::Other},
4001 {Op.getOperand(0), Extract});
4002 return DAG.
getNode(
Op.getOpcode(), dl, ScalarVT, Extract);
4011 bool IsStrict =
Op->isStrictFPOpcode();
4012 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4015 return LowerVectorFP_TO_INT(
Op, DAG);
4023 {
Op.getOperand(0), SrcVal});
4024 return DAG.
getNode(
Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
4025 {Ext.getValue(1), Ext.getValue(0)});
4028 Op.getOpcode(), dl,
Op.getValueType(),
4041 AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(
SDValue Op,
4047 EVT DstVT =
Op.getValueType();
4048 EVT SatVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
4053 assert(SatWidth <= DstElementWidth &&
4054 "Saturation width cannot exceed result width");
4066 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) {
4071 SrcElementWidth = 32;
4078 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
4079 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
4086 if (SrcElementWidth < SatWidth || SrcElementVT ==
MVT::f64)
4117 return LowerVectorFP_TO_INT_SAT(
Op, DAG);
4119 EVT DstVT =
Op.getValueType();
4120 EVT SatVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
4123 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
4126 if (SrcVT ==
MVT::f16 && !Subtarget->hasFullFP16()) {
4135 (SrcVT ==
MVT::f16 && Subtarget->hasFullFP16())) &&
4137 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
4143 if (DstWidth < SatWidth)
4170 bool IsStrict =
Op->isStrictFPOpcode();
4171 EVT VT =
Op.getValueType();
4174 EVT InVT =
In.getValueType();
4175 unsigned Opc =
Op.getOpcode();
4189 return LowerToPredicatedOp(
Op, DAG, Opcode);
4196 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
4200 if (VTSize < InVTSize) {
4206 {
Op.getOperand(0),
In});
4216 if (VTSize > InVTSize) {
4233 return DAG.
getNode(
Op.getOpcode(), dl, {ScalarVT, MVT::Other},
4234 {Op.getOperand(0), Extract});
4235 return DAG.
getNode(
Op.getOpcode(), dl, ScalarVT, Extract);
4243 if (
Op.getValueType().isVector())
4244 return LowerVectorINT_TO_FP(
Op, DAG);
4246 bool IsStrict =
Op->isStrictFPOpcode();
4247 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4250 if (
Op.getValueType() ==
MVT::f16 && !Subtarget->hasFullFP16()) {
4254 {Op.getOperand(0), SrcVal});
4282 EVT ArgVT =
Arg.getValueType();
4290 Entry.IsSExt =
false;
4291 Entry.IsZExt =
false;
4292 Args.push_back(Entry);
4295 : RTLIB::SINCOS_STRET_F32;
4306 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
4307 return CallResult.first;
4314 EVT OpVT =
Op.getValueType();
4315 EVT ArgVT =
Op.getOperand(0).getValueType();
4318 return LowerFixedLengthBitcastToSVE(
Op, DAG);
4331 "Expected int->fp bitcast!");
4335 return getSVESafeBitCast(OpVT, ExtResult, DAG);
4337 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
4365 switch (OrigSimpleTy) {
4378 unsigned ExtOpcode) {
4394 static std::optional<uint64_t>
4398 return std::nullopt;
4403 return std::nullopt;
4405 return C->getZExtValue();
4410 EVT VT =
N->getValueType(0);
4415 for (
const SDValue &Elt :
N->op_values()) {
4418 unsigned HalfSize = EltSize / 2;
4420 if (!
isIntN(HalfSize,
C->getSExtValue()))
4423 if (!
isUIntN(HalfSize,
C->getZExtValue()))
4438 N->getOperand(0)->getValueType(0),
4443 EVT VT =
N->getValueType(0);
4449 for (
unsigned i = 0;
i != NumElts; ++
i) {
4451 const APInt &CInt =
C->getAPIntValue();
4472 unsigned Opcode =
N->getOpcode();
4474 SDNode *N0 =
N->getOperand(0).getNode();
4475 SDNode *N1 =
N->getOperand(1).getNode();
4483 unsigned Opcode =
N->getOpcode();
4485 SDNode *N0 =
N->getOperand(0).getNode();
4486 SDNode *N1 =
N->getOperand(1).getNode();
4564 if (IsN0SExt && IsN1SExt)
4570 if (IsN0ZExt && IsN1ZExt)
4574 if (((IsN0SExt && IsN1ZExt) || (IsN0ZExt && IsN1SExt)) &&
4594 if (IsN0ZExt || IsN1ZExt) {
4615 SDValue(IsN0ZExt ? N1 : N0, 0));
4625 if (!IsN1SExt && !IsN1ZExt)
4647 EVT VT =
Op.getValueType();
4659 "unexpected type for custom-lowering ISD::MUL");
4660 SDNode *N0 =
Op.getOperand(0).getNode();
4661 SDNode *N1 =
Op.getOperand(1).getNode();
4682 "unexpected types for extended operands to VMULL");
4683 return DAG.
getNode(NewOpc,
DL, VT, Op0, Op1);
4707 bool IsLess,
bool IsEqual) {
4708 if (!isa<ConstantSDNode>(
Op.getOperand(1)) ||
4709 !isa<ConstantSDNode>(
Op.getOperand(2)))
4713 APInt X =
Op.getConstantOperandAPInt(1);
4714 APInt Y =
Op.getConstantOperandAPInt(2);
4715 APInt NumActiveElems;
4718 NumActiveElems = IsSigned ?
Y.ssub_ov(
X, Overflow) :
Y.usub_ov(
X, Overflow);
4720 NumActiveElems = IsSigned ?
X.ssub_ov(
Y, Overflow) :
X.usub_ov(
Y, Overflow);
4727 NumActiveElems = IsSigned ? NumActiveElems.
sadd_ov(One, Overflow)
4728 : NumActiveElems.
uadd_ov(One, Overflow);
4733 std::optional<unsigned> PredPattern =
4735 unsigned MinSVEVectorSize =
std::max(
4737 unsigned ElementSize = 128 /
Op.getValueType().getVectorMinNumElements();
4738 if (PredPattern != std::nullopt &&
4739 NumActiveElems.
getZExtValue() <= (MinSVEVectorSize / ElementSize))
4740 return getPTrue(DAG, dl,
Op.getValueType(), *PredPattern);
4749 EVT InVT =
Op.getValueType();
4753 "Expected a predicate-to-predicate bitcast");
4757 "Only expect to cast between legal scalable predicate types!");
4787 if (
Attrs.hasStreamingInterfaceOrBody())
4790 if (
Attrs.hasNonStreamingInterfaceAndBody())
4793 assert(
Attrs.hasStreamingCompatibleInterface() &&
"Unexpected interface");
4801 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
4804 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
4811 if (
auto *ES = dyn_cast<ExternalSymbolSDNode>(V)) {
4813 if (
S ==
"__arm_sme_state" ||
S ==
"__arm_tpidr2_save")
4815 if (
S ==
"__arm_tpidr2_restore")
4818 return std::nullopt;
4823 unsigned IntNo =
Op.getConstantOperandVal(1);
4828 case Intrinsic::aarch64_prefetch: {
4832 unsigned IsWrite = cast<ConstantSDNode>(
Op.getOperand(3))->getZExtValue();
4833 unsigned Locality = cast<ConstantSDNode>(
Op.getOperand(4))->getZExtValue();
4834 unsigned IsStream = cast<ConstantSDNode>(
Op.getOperand(5))->getZExtValue();
4835 unsigned IsData = cast<ConstantSDNode>(
Op.getOperand(6))->getZExtValue();
4836 unsigned PrfOp = (IsWrite << 4) |
4844 case Intrinsic::aarch64_sme_za_enable:
4850 case Intrinsic::aarch64_sme_za_disable:
4861 unsigned IntNo =
Op.getConstantOperandVal(1);
4866 case Intrinsic::aarch64_mops_memset_tag: {
4867 auto Node = cast<MemIntrinsicSDNode>(
Op.getNode());
4868 SDValue Chain = Node->getChain();
4873 auto Alignment = Node->getMemOperand()->getAlign();
4874 bool IsVol = Node->isVolatile();
4875 auto DstPtrInfo = Node->getPointerInfo();
4894 unsigned IntNo = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
4898 case Intrinsic::thread_pointer: {
4902 case Intrinsic::aarch64_neon_abs: {
4903 EVT Ty =
Op.getValueType();
4915 case Intrinsic::aarch64_neon_pmull64: {
4919 std::optional<uint64_t> LHSLane =
4921 std::optional<uint64_t> RHSLane =
4924 assert((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1");
4925 assert((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1");
4931 auto TryVectorizeOperand = [](
SDValue N, std::optional<uint64_t> NLane,
4932 std::optional<uint64_t> OtherLane,
4938 if (NLane && *NLane == 1)
4943 if (OtherLane && *OtherLane == 1) {
4950 if (NLane && *NLane == 0)
4964 "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
4968 LHS = TryVectorizeOperand(
LHS, LHSLane, RHSLane, dl, DAG);
4969 RHS = TryVectorizeOperand(
RHS, RHSLane, LHSLane, dl, DAG);
4973 case Intrinsic::aarch64_neon_smax:
4975 Op.getOperand(1),
Op.getOperand(2));
4976 case Intrinsic::aarch64_neon_umax:
4978 Op.getOperand(1),
Op.getOperand(2));
4979 case Intrinsic::aarch64_neon_smin:
4981 Op.getOperand(1),
Op.getOperand(2));
4982 case Intrinsic::aarch64_neon_umin:
4984 Op.getOperand(1),
Op.getOperand(2));
4985 case Intrinsic::aarch64_neon_scalar_sqxtn:
4986 case Intrinsic::aarch64_neon_scalar_sqxtun:
4987 case Intrinsic::aarch64_neon_scalar_uqxtn: {
4994 Op.getOperand(1))));
4997 case Intrinsic::aarch64_sve_whilelo:
5000 case Intrinsic::aarch64_sve_whilelt:
5003 case Intrinsic::aarch64_sve_whilels:
5006 case Intrinsic::aarch64_sve_whilele:
5009 case Intrinsic::aarch64_sve_whilege:
5012 case Intrinsic::aarch64_sve_whilegt:
5015 case Intrinsic::aarch64_sve_whilehs:
5018 case Intrinsic::aarch64_sve_whilehi:
5021 case Intrinsic::aarch64_sve_sunpkhi:
5024 case Intrinsic::aarch64_sve_sunpklo:
5027 case Intrinsic::aarch64_sve_uunpkhi:
5030 case Intrinsic::aarch64_sve_uunpklo:
5033 case Intrinsic::aarch64_sve_clasta_n:
5035 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
5036 case Intrinsic::aarch64_sve_clastb_n:
5038 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
5039 case Intrinsic::aarch64_sve_lasta:
5041 Op.getOperand(1),
Op.getOperand(2));
5042 case Intrinsic::aarch64_sve_lastb:
5044 Op.getOperand(1),
Op.getOperand(2));
5045 case Intrinsic::aarch64_sve_rev:
5048 case Intrinsic::aarch64_sve_tbl:
5050 Op.getOperand(1),
Op.getOperand(2));
5051 case Intrinsic::aarch64_sve_trn1:
5053 Op.getOperand(1),
Op.getOperand(2));
5054 case Intrinsic::aarch64_sve_trn2:
5056 Op.getOperand(1),
Op.getOperand(2));
5057 case Intrinsic::aarch64_sve_uzp1:
5059 Op.getOperand(1),
Op.getOperand(2));
5060 case Intrinsic::aarch64_sve_uzp2:
5062 Op.getOperand(1),
Op.getOperand(2));
5063 case Intrinsic::aarch64_sve_zip1:
5065 Op.getOperand(1),
Op.getOperand(2));
5066 case Intrinsic::aarch64_sve_zip2:
5068 Op.getOperand(1),
Op.getOperand(2));
5069 case Intrinsic::aarch64_sve_splice:
5071 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
5072 case Intrinsic::aarch64_sve_ptrue:
5074 cast<ConstantSDNode>(
Op.getOperand(1))->getZExtValue());
5075 case Intrinsic::aarch64_sve_clz:
5077 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5078 case Intrinsic::aarch64_sme_cntsb:
5081 case Intrinsic::aarch64_sme_cntsh: {
5086 case Intrinsic::aarch64_sme_cntsw: {
5092 case Intrinsic::aarch64_sme_cntsd: {
5098 case Intrinsic::aarch64_sve_cnt: {
5101 if (
Data.getValueType().isFloatingPoint())
5104 Op.getOperand(2),
Data,
Op.getOperand(1));
5106 case Intrinsic::aarch64_sve_dupq_lane:
5107 return LowerDUPQLane(
Op, DAG);
5108 case Intrinsic::aarch64_sve_convert_from_svbool:
5110 case Intrinsic::aarch64_sve_convert_to_svbool:
5112 case Intrinsic::aarch64_sve_fneg:
5114 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5115 case Intrinsic::aarch64_sve_frintp:
5117 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5118 case Intrinsic::aarch64_sve_frintm:
5120 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5121 case Intrinsic::aarch64_sve_frinti:
5123 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5124 case Intrinsic::aarch64_sve_frintx:
5126 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5127 case Intrinsic::aarch64_sve_frinta:
5129 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5130 case Intrinsic::aarch64_sve_frintn:
5132 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5133 case Intrinsic::aarch64_sve_frintz:
5135 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5136 case Intrinsic::aarch64_sve_ucvtf:
5138 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
5140 case Intrinsic::aarch64_sve_scvtf:
5142 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
5144 case Intrinsic::aarch64_sve_fcvtzu:
5146 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
5148 case Intrinsic::aarch64_sve_fcvtzs:
5150 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
5152 case Intrinsic::aarch64_sve_fsqrt:
5154 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5155 case Intrinsic::aarch64_sve_frecpx:
5157 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5158 case Intrinsic::aarch64_sve_frecpe_x:
5161 case Intrinsic::aarch64_sve_frecps_x:
5163 Op.getOperand(1),
Op.getOperand(2));
5164 case Intrinsic::aarch64_sve_frsqrte_x:
5167 case Intrinsic::aarch64_sve_frsqrts_x:
5169 Op.getOperand(1),
Op.getOperand(2));
5170 case Intrinsic::aarch64_sve_fabs:
5172 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5173 case Intrinsic::aarch64_sve_abs:
5175 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5176 case Intrinsic::aarch64_sve_neg:
5178 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5179 case Intrinsic::aarch64_sve_insr: {
5186 Op.getOperand(1), Scalar);
5188 case Intrinsic::aarch64_sve_rbit:
5190 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
5192 case Intrinsic::aarch64_sve_revb:
5194 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5195 case Intrinsic::aarch64_sve_revh:
5197 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5198 case Intrinsic::aarch64_sve_revw:
5200 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5201 case Intrinsic::aarch64_sve_revd:
5203 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5204 case Intrinsic::aarch64_sve_sxtb:
5207 Op.getOperand(2),
Op.getOperand(3),
5210 case Intrinsic::aarch64_sve_sxth:
5213 Op.getOperand(2),
Op.getOperand(3),
5216 case Intrinsic::aarch64_sve_sxtw:
5219 Op.getOperand(2),
Op.getOperand(3),
5222 case Intrinsic::aarch64_sve_uxtb:
5225 Op.getOperand(2),
Op.getOperand(3),
5228 case Intrinsic::aarch64_sve_uxth:
5231 Op.getOperand(2),
Op.getOperand(3),
5234 case Intrinsic::aarch64_sve_uxtw:
5237 Op.getOperand(2),
Op.getOperand(3),
5240 case Intrinsic::localaddress: {
5243 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
5245 Op.getSimpleValueType());
5248 case Intrinsic::eh_recoverfp: {
5253 SDValue IncomingFPOp =
Op.getOperand(2);
5255 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->
getGlobal() :
nullptr);
5258 "llvm.eh.recoverfp must take a function as the first argument");
5259 return IncomingFPOp;
5262 case Intrinsic::aarch64_neon_vsri:
5263 case Intrinsic::aarch64_neon_vsli: {
5264 EVT Ty =
Op.getValueType();
5271 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
5273 return DAG.
getNode(Opcode, dl, Ty,
Op.getOperand(1),
Op.getOperand(2),
5277 case Intrinsic::aarch64_neon_srhadd:
5278 case Intrinsic::aarch64_neon_urhadd:
5279 case Intrinsic::aarch64_neon_shadd:
5280 case Intrinsic::aarch64_neon_uhadd: {
5281 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
5282 IntNo == Intrinsic::aarch64_neon_shadd);
5283 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
5284 IntNo == Intrinsic::aarch64_neon_urhadd);
5285 unsigned Opcode = IsSignedAdd
5288 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1),
5291 case Intrinsic::aarch64_neon_saddlp:
5292 case Intrinsic::aarch64_neon_uaddlp: {
5293 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
5296 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1));
5298 case Intrinsic::aarch64_neon_sdot:
5299 case Intrinsic::aarch64_neon_udot:
5300 case Intrinsic::aarch64_sve_sdot:
5301 case Intrinsic::aarch64_sve_udot: {
5302 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
5303 IntNo == Intrinsic::aarch64_sve_udot)
5306 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1),
5307 Op.getOperand(2),
Op.getOperand(3));
5309 case Intrinsic::get_active_lane_mask: {
5313 Op.getOperand(1),
Op.getOperand(2));
5318 bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
5327 bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
EVT IndexVT,
5342 bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
5350 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
5351 {std::make_tuple(
false,
false,
false),
5353 {std::make_tuple(
false,
false,
true),
5355 {std::make_tuple(
false,
true,
false),
5357 {std::make_tuple(
false,
true,
true),
5359 {std::make_tuple(
true,
false,
false),
5361 {std::make_tuple(
true,
false,
true),
5363 {std::make_tuple(
true,
true,
false),
5365 {std::make_tuple(
true,
true,
true),
5368 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
5369 return AddrModes.find(
Key)->second;
5405 EVT VT =
Op.getValueType();
5429 EVT IndexVT =
Index.getValueType();
5442 "Cannot lower when not using SVE for fixed vectors!");
5452 Mask.getValueType().getVectorElementType() ==
MVT::i64)
5517 EVT IndexVT =
Index.getValueType();
5530 "Cannot lower when not using SVE for fixed vectors!");
5543 Mask.getValueType().getVectorElementType() ==
MVT::i64)
5553 if (PromotedVT != VT)
5577 assert(LoadNode &&
"Expected custom lowering of a masked load node");
5578 EVT VT =
Op->getValueType(0);
5583 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
5631 ST->getBasePtr(),
ST->getMemOperand());
5641 assert (StoreNode &&
"Can only custom lower store nodes");
5652 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
5688 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
5693 return LowerStore128(
Op, DAG);
5699 EVT PtrVT =
Base.getValueType();
5700 for (
unsigned i = 0;
i < 8;
i++) {
5721 bool IsStoreRelease =
5724 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
5725 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
5741 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
5750 assert(LoadNode &&
"Expected custom lowering of a load node");
5756 EVT PtrVT =
Base.getValueType();
5757 for (
unsigned i = 0;
i < 8;
i++) {
5763 Ops.push_back(Part);
5771 EVT VT =
Op->getValueType(0);
5801 MVT VT =
Op.getSimpleValueType();
5839 switch (
Op.getOpcode()) {
5844 return LowerBITCAST(
Op, DAG);
5846 return LowerGlobalAddress(
Op, DAG);
5848 return LowerGlobalTLSAddress(
Op, DAG);
5852 return LowerSETCC(
Op, DAG);
5854 return LowerSETCCCARRY(
Op, DAG);
5858 return LowerBR_CC(
Op, DAG);
5860 return LowerSELECT(
Op, DAG);
5862 return LowerSELECT_CC(
Op, DAG);
5864 return LowerJumpTable(
Op, DAG);
5866 return LowerBR_JT(
Op, DAG);
5868 return LowerConstantPool(
Op, DAG);
5870 return LowerBlockAddress(
Op, DAG);
5872 return LowerVASTART(
Op, DAG);
5874 return LowerVACOPY(
Op, DAG);
5876 return LowerVAARG(
Op, DAG);
5924 return LowerFP_ROUND(
Op, DAG);
5926 return LowerFP_EXTEND(
Op, DAG);
5928 return LowerFRAMEADDR(
Op, DAG);
5930 return LowerSPONENTRY(
Op, DAG);
5932 return LowerRETURNADDR(
Op, DAG);
5934 return LowerADDROFRETURNADDR(
Op, DAG);
5936 return LowerCONCAT_VECTORS(
Op, DAG);
5938 return LowerINSERT_VECTOR_ELT(
Op, DAG);
5940 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
5942 return LowerBUILD_VECTOR(
Op, DAG);
5944 return LowerZERO_EXTEND_VECTOR_INREG(
Op, DAG);
5946 return LowerVECTOR_SHUFFLE(
Op, DAG);
5948 return LowerSPLAT_VECTOR(
Op, DAG);
5950 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
5952 return LowerINSERT_SUBVECTOR(
Op, DAG);
5955 return LowerDIV(
Op, DAG);
5960 return LowerMinMax(
Op, DAG);
5964 return LowerVectorSRA_SRL_SHL(
Op, DAG);
5968 return LowerShiftParts(
Op, DAG);
5971 return LowerCTPOP_PARITY(
Op, DAG);
5973 return LowerFCOPYSIGN(
Op, DAG);
5975 return LowerVectorOR(
Op, DAG);
5977 return LowerXOR(
Op, DAG);
5984 return LowerINT_TO_FP(
Op, DAG);
5989 return LowerFP_TO_INT(
Op, DAG);
5992 return LowerFP_TO_INT_SAT(
Op, DAG);
5994 return LowerFSINCOS(
Op, DAG);
5996 return LowerGET_ROUNDING(
Op, DAG);
5998 return LowerSET_ROUNDING(
Op, DAG);
6000 return LowerMUL(
Op, DAG);
6006 return LowerINTRINSIC_W_CHAIN(
Op, DAG);
6008 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
6010 return LowerINTRINSIC_VOID(
Op, DAG);
6012 if (cast<MemSDNode>(
Op)->getMemoryVT() ==
MVT::i128) {
6013 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
6014 return LowerStore128(
Op, DAG);
6018 return LowerSTORE(
Op, DAG);
6020 return LowerFixedLengthVectorMStoreToSVE(
Op, DAG);
6022 return LowerMGATHER(
Op, DAG);
6024 return LowerMSCATTER(
Op, DAG);
6026 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
6038 return LowerVECREDUCE(
Op, DAG);
6040 return LowerATOMIC_LOAD_SUB(
Op, DAG);
6042 return LowerATOMIC_LOAD_AND(
Op, DAG);
6044 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
6046 return LowerVSCALE(
Op, DAG);
6050 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
6053 EVT ExtraVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
6059 return LowerToPredicatedOp(
Op, DAG,
6063 return LowerTRUNCATE(
Op, DAG);
6065 return LowerMLOAD(
Op, DAG);
6069 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
6070 return LowerLOAD(
Op, DAG);
6074 return LowerToScalableOp(
Op, DAG);
6084 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
6086 return LowerABS(
Op, DAG);
6100 return LowerBitreverse(
Op, DAG);
6106 return LowerCTTZ(
Op, DAG);
6108 return LowerVECTOR_SPLICE(
Op, DAG);
6114 "Expected custom lowering of rounding operations only for f16");
6117 {
Op.getOperand(0),
Op.getOperand(1)});
6118 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
6119 {Ext.getValue(1), Ext.getValue(0)});
6123 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
6137 SysRegName, PairLo, PairHi);
6153 EVT VT,
bool OverrideNEON)
const {
6176 return Subtarget->hasSVE();
6203 unsigned Opcode =
N->getOpcode();
6208 unsigned IID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
6209 if (IID < Intrinsic::num_intrinsics)
6223 if (IID == Intrinsic::aarch64_neon_umull ||
6225 IID == Intrinsic::aarch64_neon_smull ||
6234 bool IsVarArg)
const {
6285 AArch64TargetLowering::allocateLazySaveBuffer(
SDValue &Chain,
const SDLoc &
DL,
6313 SDValue AArch64TargetLowering::LowerFormalArguments(
6340 unsigned NumArgs =
Ins.size();
6342 unsigned CurArgIdx = 0;
6343 for (
unsigned i = 0;
i != NumArgs; ++
i) {
6345 if (
Ins[
i].isOrigArg()) {
6346 std::advance(CurOrigArg,
Ins[
i].getOrigArgIndex() - CurArgIdx);
6347 CurArgIdx =
Ins[
i].getOrigArgIndex();
6359 bool UseVarArgCC =
false;
6361 UseVarArgCC = isVarArg;
6365 assert(!Res &&
"Call operand has unhandled type");
6370 bool IsLocallyStreaming =
6371 !
Attrs.hasStreamingInterface() &&
Attrs.hasStreamingBody();
6376 unsigned ExtraArgLocs = 0;
6377 for (
unsigned i = 0,
e =
Ins.size();
i !=
e; ++
i) {
6384 int Size =
Ins[
i].Flags.getByValSize();
6385 unsigned NumRegs = (
Size + 7) / 8;
6392 InVals.push_back(FrameIdxN);
6407 RC = &AArch64::GPR32RegClass;
6409 RC = &AArch64::GPR64RegClass;
6411 RC = &AArch64::FPR16RegClass;
6413 RC = &AArch64::FPR32RegClass;
6415 RC = &AArch64::FPR64RegClass;
6417 RC = &AArch64::FPR128RegClass;
6421 RC = &AArch64::PPRRegClass;
6424 RC = &AArch64::ZPRRegClass;
6431 if (IsLocallyStreaming) {
6458 "Indirect arguments should be scalable on most subtargets");
6482 !
Ins[
i].Flags.isInConsecutiveRegs())
6483 BEAlign = 8 - ArgSize;
6490 unsigned ObjOffset = ArgOffset + BEAlign;
6518 "Indirect arguments should be scalable on most subtargets");
6539 "Indirect arguments should be scalable on most subtargets");
6542 unsigned NumParts = 1;
6543 if (
Ins[
i].
Flags.isInConsecutiveRegs()) {
6545 while (!
Ins[
i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
6554 while (NumParts > 0) {
6556 InVals.push_back(ArgValue);
6562 DL,
Ptr.getValueType(),
6563 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize));
6566 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize),
DL,
6567 Ptr.getValueType());
6570 Flags.setNoUnsignedWrap(
true);
6572 BytesIncrement, Flags);
6584 if (
Ins[
i].isOrigArg()) {
6594 InVals.push_back(ArgValue);
6597 assert((ArgLocs.size() + ExtraArgLocs) ==
Ins.size());
6601 if (IsLocallyStreaming) {
6606 DAG.getTargetConstant((int32_t)AArch64SVCR::SVCRSM, DL, MVT::i32),
6607 DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64),
6608 DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()), Glue});
6611 for (
unsigned I=0;
I<InVals.size(); ++
I) {
6628 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
6632 unsigned StackOffset = CCInfo.getNextStackOffset();
6645 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
6649 if (!CCInfo.isAllocated(AArch64::X8)) {
6660 for (
unsigned I = 0,
E =
Ins.size();
I !=
E; ++
I) {
6676 unsigned StackArgSize = CCInfo.getNextStackOffset();
6678 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
6682 StackArgSize =
alignTo(StackArgSize, 16);
6701 unsigned TPIDR2Obj = allocateLazySaveBuffer(Chain,
DL, DAG);
6708 void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
6729 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
6731 if (GPRSaveSize != 0) {
6734 if (GPRSaveSize & 15)
6754 for (
unsigned i = FirstVariadicGPR;
i < NumGPRArgRegs; ++
i) {
6760 MF,
GPRIdx, (
i - FirstVariadicGPR) * 8)
6762 MemOps.push_back(
Store);
6770 if (Subtarget->hasFPARMv8() && !IsWin64) {
6772 const unsigned NumFPRArgRegs =
FPRArgRegs.size();
6775 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
6777 if (FPRSaveSize != 0) {
6782 for (
unsigned i = FirstVariadicFPR;
i < NumFPRArgRegs; ++
i) {
6788 MemOps.push_back(
Store);
6797 if (!MemOps.empty()) {
6804 SDValue AArch64TargetLowering::LowerCallResult(
6811 for (
unsigned i = 0;
i != RVLocs.size(); ++
i) {
6816 if (
i == 0 && isThisReturn) {
6818 "unexpected return calling convention register assignment");
6819 InVals.push_back(ThisVal);
6853 InVals.push_back(Val);
6891 unsigned NumArgs = Outs.size();
6892 for (
unsigned i = 0;
i != NumArgs; ++
i) {
6893 MVT ArgVT = Outs[
i].VT;
6896 bool UseVarArgCC =
false;
6900 if (IsCalleeWin64) {
6903 UseVarArgCC = !Outs[
i].IsFixed;
6922 assert(!Res &&
"Call operand has unhandled type");
6927 bool AArch64TargetLowering::isEligibleForTailCallOptimization(
6928 const CallLoweringInfo &CLI)
const {
6934 bool IsVarArg = CLI.IsVarArg;
6947 if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
6948 CallerAttrs.requiresLazySave(CalleeAttrs))
6959 bool CCMatch = CallerCC == CalleeCC;
6974 if (
i->hasByValAttr())
6983 if (
i->hasInRegAttr())
7001 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
7012 "Unexpected variadic calling convention");
7026 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
7027 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
7038 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs,
C);
7042 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
7051 if (!ArgLoc.isRegLoc())
7063 A.getValVT().isScalableVector() ||
7065 "Expected value to be scalable");
7082 SDValue AArch64TargetLowering::addTokenForArgument(
SDValue Chain,
7085 int ClobberedFI)
const {
7088 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
7093 ArgChains.push_back(Chain);
7099 if (FI->getIndex() < 0) {
7101 int64_t InLastByte = InFirstByte;
7104 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
7105 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
7106 ArgChains.push_back(
SDValue(L, 1));
7113 bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
7114 bool TailCallOpt)
const {
7121 unsigned SizeInBits =
Arg.getValueType().getSizeInBits();
7125 APInt RequredZero(SizeInBits, 0xFE);
7127 bool ZExtBool = (
Bits.Zero & RequredZero) == RequredZero;
7144 Ops.push_back(InFlag);
7153 AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
7162 bool &IsTailCall = CLI.IsTailCall;
7164 bool IsVarArg = CLI.IsVarArg;
7168 bool IsThisReturn =
false;
7172 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
7173 bool IsSibCall =
false;
7174 bool GuardWithBTI =
false;
7176 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
7177 !Subtarget->noBTIAtReturnTwice()) {
7186 unsigned NumArgs = Outs.size();
7188 for (
unsigned i = 0;
i != NumArgs; ++
i) {
7191 "currently not supported");
7202 RetCCInfo.AnalyzeCallResult(
Ins, RetCC);
7208 if (!Loc.isRegLoc())
7210 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
7211 AArch64::PPRRegClass.
contains(Loc.getLocReg());
7213 if (
any_of(RVLocs, HasSVERegLoc) ||
any_of(ArgLocs, HasSVERegLoc))
7219 IsTailCall = isEligibleForTailCallOptimization(CLI);
7231 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
7233 "site marked musttail");
7251 if (IsTailCall && !IsSibCall) {
7256 NumBytes =
alignTo(NumBytes, 16);
7261 FPDiff = NumReusableBytes - NumBytes;
7265 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
7273 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
7280 else if (std::optional<SMEAttrs>
Attrs =
7282 CalleeAttrs = *
Attrs;
7284 bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
7287 if (RequiresLazySave) {
7309 std::optional<bool> RequiresSMChange =
7310 CallerAttrs.requiresSMChange(CalleeAttrs);
7311 if (RequiresSMChange)
7312 PStateSM = getPStateSM(DAG, Chain, CallerAttrs,
DL,
MVT::i64);
7327 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
7329 for (
const auto &
F : Forwards) {
7336 unsigned ExtraArgLocs = 0;
7337 for (
unsigned i = 0,
e = Outs.size();
i !=
e; ++
i) {
7394 "Indirect arguments should be scalable on most subtargets");
7398 unsigned NumParts = 1;
7399 if (Outs[
i].
Flags.isInConsecutiveRegs()) {
7401 while (!Outs[
i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
7403 StoreSize *= NumParts;
7426 DL,
Ptr.getValueType(),
7427 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize));
7430 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize),
DL,
7431 Ptr.getValueType());
7434 Flags.setNoUnsignedWrap(
true);
7438 BytesIncrement, Flags);
7449 if (
i == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
7452 "unexpected calling convention register assignment");
7454 "unexpected use of 'returned'");
7455 IsThisReturn =
true;
7464 [=](
const std::pair<unsigned, SDValue> &Elt) {
7479 if (RequiresSMChange && isa<FrameIndexSDNode>(
Arg))
7501 OpSize =
Flags.isByVal() ?
Flags.getByValSize() * 8
7503 OpSize = (OpSize + 7) / 8;
7505 !
Flags.isInConsecutiveRegs()) {
7507 BEAlign = 8 - OpSize;
7510 int32_t
Offset = LocMemOffset + BEAlign;
7524 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
7532 if (Outs[
i].
Flags.isByVal()) {
7536 Chain,
DL, DstAddr,
Arg, SizeNode,
7537 Outs[
i].
Flags.getNonZeroByValAlign(),
7541 MemOpChains.push_back(Cpy);
7551 MemOpChains.push_back(
Store);
7566 if (!MemOpChains.empty())
7570 if (RequiresSMChange) {
7572 InFlag, PStateSM,
true);
7579 for (
auto &RegToPass : RegsToPass) {
7581 RegToPass.second, InFlag);
7588 if (
auto *
G = dyn_cast<GlobalAddressSDNode>(Callee)) {
7589 auto GV =
G->getGlobal();
7599 }
else if (
auto *
S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
7602 const char *Sym =
S->getSymbol();
7606 const char *Sym =
S->getSymbol();
7615 if (IsTailCall && !IsSibCall) {
7620 std::vector<SDValue> Ops;
7621 Ops.push_back(Chain);
7622 Ops.push_back(Callee);
7633 for (
auto &RegToPass : RegsToPass)
7635 RegToPass.second.getValueType()));
7642 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
7644 IsThisReturn =
false;
7651 TRI->UpdateCustomCallPreservedMask(MF, &
Mask);
7653 if (
TRI->isAnyArgRegReserved(MF))
7654 TRI->emitReservedArgRegCallError(MF);
7656 assert(
Mask &&
"Missing call preserved mask for calling convention");
7660 Ops.push_back(InFlag);
7671 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
7683 "tail calls cannot be marked with clang.arc.attachedcall");
7690 Ops.insert(Ops.begin() + 1, GA);
7691 }
else if (GuardWithBTI)
7695 Chain = DAG.
getNode(CallOpc,
DL, NodeTys, Ops);
7705 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
7712 SDValue Result = LowerCallResult(Chain, InFlag, CallConv, IsVarArg, RVLocs,
7713 DL, DAG, InVals, IsThisReturn,
7714 IsThisReturn ? OutVals[0] :
SDValue());
7719 if (RequiresSMChange) {
7720 assert(PStateSM &&
"Expected a PStateSM to be set");
7725 if (RequiresLazySave) {
7735 TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
7762 if (RequiresSMChange || RequiresLazySave) {
7763 for (
unsigned I = 0;
I < InVals.size(); ++
I) {
7780 bool AArch64TargetLowering::CanLowerReturn(
7807 for (
unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
7808 ++
i, ++realRVLocIdx) {
7842 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
7856 if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface()) {
7866 for (
auto &RetVal : RetVals) {
7870 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
7881 unsigned RetValReg = AArch64::X0;
7892 if (AArch64::GPR64RegClass.
contains(*
I))
7894 else if (AArch64::FPR64RegClass.
contains(*
I))
7905 RetOps.push_back(
Flag);
7916 unsigned Flag)
const {
7918 N->getOffset(),
Flag);
7923 unsigned Flag)
const {
7929 unsigned Flag)
const {
7931 N->getOffset(),
Flag);
7936 unsigned Flag)
const {
7941 template <
class NodeTy>
7943 unsigned Flags)
const {
7954 template <
class NodeTy>
7956 unsigned Flags)
const {
7970 template <
class NodeTy>
7972 unsigned Flags)
const {
7984 template <
class NodeTy>
7986 unsigned Flags)
const {
7990 SDValue Sym = getTargetNode(
N, Ty, DAG, Flags);
8002 "unexpected offset in global node");
8007 return getGOT(GN, DAG, OpFlags);
8012 Result = getAddrLarge(GN, DAG, OpFlags);
8014 Result = getAddrTiny(GN, DAG, OpFlags);
8016 Result = getAddr(GN, DAG, OpFlags);
8056 AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
8059 "This function expects a Darwin target");
8064 const GlobalValue *GV = cast<GlobalAddressSDNode>(
Op)->getGlobal();
8074 PtrMemVT,
DL, Chain, DescAddr,
8214 SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
8230 AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
8246 "in local exec TLS model");
8262 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
8285 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
8309 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
8317 AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
8356 Chain =
TLS.getValue(1);
8382 return LowerDarwinGlobalTLSAddress(
Op, DAG);
8384 return LowerELFGlobalTLSAddress(
Op, DAG);
8386 return LowerWindowsGlobalTLSAddress(
Op, DAG);
8397 cast<VTSDNode>(Val.
getOperand(1))->getVT().getFixedSizeInBits() -
8419 bool ProduceNonFlagSettingCondBr =
8430 if (!
RHS.getNode()) {
8457 if (
LHS.getValueType().isInteger()) {
8464 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
8471 isa<ConstantSDNode>(
LHS.getOperand(1)) &&
8487 isa<ConstantSDNode>(
LHS.getOperand(1)) &&
8508 LHS.getOpcode() !=
ISD::AND && ProduceNonFlagSettingCondBr) {
8546 if (!Subtarget->hasNEON())
8549 EVT VT =
Op.getValueType();
8577 return getSVESafeBitCast(VT,
Op, DAG);
8584 auto SetVecVal = [&](
int Idx = -1) {
8591 VecVal1 = BitCast(VecVT, In1, DAG);
8592 VecVal2 = BitCast(VecVT, In2, DAG);
8600 SetVecVal(AArch64::dsub);
8603 SetVecVal(AArch64::ssub);
8606 SetVecVal(AArch64::hsub);
8633 return BitCast(VT,
BSP, DAG);
8639 Attribute::NoImplicitFloat))
8642 if (!Subtarget->hasNEON())
8648 EVT VT =
Op.getValueType();
8695 assert(!IsParity &&
"ISD::PARITY of vector types not supported");
8704 "Unexpected type for custom ctpop lowering");
8711 unsigned EltSize = 8;
8726 EVT VT =
Op.getValueType();
8739 EVT VT =
Op.getValueType();
8741 unsigned Opcode =
Op.getOpcode();
8785 EVT VT =
Op.getValueType();
8836 SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
8842 N =
N->getOperand(0);
8846 WorkList.push_back(std::make_pair(
N->getOperand(0),
N->getOperand(1)));
8852 if (
N->getOpcode() !=
ISD::OR || !
N->hasOneUse())
8866 EVT VT =
N->getValueType(0);
8876 unsigned NumXors = 0;
8881 std::tie(XOR0, XOR1) = WorkList[0];
8884 for (
unsigned I = 1;
I < WorkList.size();
I++) {
8885 std::tie(XOR0, XOR1) = WorkList[
I];
8887 Cmp = DAG.
getNode(LogicOp,
DL, VT, Cmp, CmpChain);
8899 if (
Op.getValueType().isVector())
8900 return LowerVSETCC(
Op, DAG);
8902 bool IsStrict =
Op->isStrictFPOpcode();
8904 unsigned OpNo = IsStrict ? 1 : 0;
8907 Chain =
Op.getOperand(0);
8914 EVT VT =
Op.getValueType();
8925 if (!
RHS.getNode()) {
8927 "Unexpected setcc expansion!");
8932 if (
LHS.getValueType().isInteger()) {
8990 EVT VT =
LHS.getValueType();
9001 EVT OpVT =
Op.getValueType();
9026 if (!
RHS.getNode()) {
9033 if (
LHS.getValueType() ==
MVT::f16 && !Subtarget->hasFullFP16()) {
9039 if (
LHS.getValueType().isInteger()) {
9052 EVT VT =
LHS.getValueType();
9067 }
else if (CTVal && CFVal && CTVal->
isOne() && CFVal->
isZero()) {
9087 }
else if (CTVal && CFVal) {
9109 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
9112 if (TrueVal32 > FalseVal32) {
9121 if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
9157 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
9160 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
9174 return DAG.
getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
9193 if (RHSVal && RHSVal->
isZero()) {
9201 CFVal && CFVal->
isZero() &&
9224 EVT Ty =
Op.getValueType();
9225 auto Idx =
Op.getConstantOperandAPInt(2);
9226 int64_t IdxVal = Idx.getSExtValue();
9228 "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
9237 std::optional<unsigned> PredPattern;
9270 return LowerSELECT_CC(
CC,
LHS,
RHS, TVal, FVal,
DL, DAG);
9280 EVT Ty =
Op.getValueType();
9364 return getAddrLarge(
JT, DAG);
9366 return getAddrTiny(
JT, DAG);
9368 return getAddr(
JT, DAG);
9378 int JTI = cast<JumpTableSDNode>(
JT.getNode())->getIndex();
9397 return getGOT(
CP, DAG);
9399 return getAddrLarge(
CP, DAG);
9401 return getAddrTiny(
CP, DAG);
9403 return getAddr(
CP, DAG);
9412 return getAddrLarge(BA, DAG);
9414 return getAddrTiny(BA, DAG);
9416 return getAddr(BA, DAG);
9428 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
9459 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
9477 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
9484 MemOps.push_back(DAG.
getStore(Chain,
DL, Stack, VAList,
9501 MemOps.push_back(DAG.
getStore(Chain,
DL, GRTop, GRTopAddr,
9519 MemOps.push_back(DAG.
getStore(Chain,
DL, VRTop, VRTopAddr,
9548 return LowerWin64_VASTART(
Op, DAG);
9550 return LowerDarwin_VASTART(
Op, DAG);
9552 return LowerAAPCS_VASTART(
Op, DAG);
9561 unsigned VaListSize =
9565 const Value *DestSV = cast<SrcValueSDNode>(
Op.getOperand(3))->getValue();
9566 const Value *SrcSV = cast<SrcValueSDNode>(
Op.getOperand(4))->getValue();
9570 Align(PtrSize),
false,
false,
false,
9576 "automatic va_arg instruction only works on Darwin");
9578 const Value *V = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
9579 EVT VT =
Op.getValueType();
9594 "currently not supported");
9611 ArgSize =
std::max(ArgSize, MinSlotSize);
9612 bool NeedFPTrunc =
false;
9649 EVT VT =
Op.getValueType();
9651 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
9675 #define GET_REGISTER_MATCHER
9676 #include "AArch64GenAsmMatcher.inc"
9683 if (AArch64::X1 <=
Reg &&
Reg <= AArch64::X28) {
9685 unsigned DwarfRegNum =
MRI->getDwarfRegNum(
Reg,
false);
9699 EVT VT =
Op.getValueType();
9715 EVT VT =
Op.getValueType();
9717 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
9720 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
9737 if (Subtarget->hasPAuth()) {
9765 bool OptForSize)
const {
9766 bool IsLegal =
false;
9770 const APInt ImmInt =
Imm.bitcastToAPInt();
9793 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
9794 IsLegal =
Insn.size() <= Limit;
9798 <<
" imm value: ";
Imm.dump(););
9810 if ((
ST->hasNEON() &&
9816 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
9824 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
9834 EVT VT =
Op.getValueType();
9841 AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
9850 bool Reciprocal)
const {
9852 (
Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
9859 Flags.setAllowReassociation(
true);
9863 for (
int i = ExtraSteps;
i > 0; --
i) {
9881 int &ExtraSteps)
const {
9889 Flags.setAllowReassociation(
true);
9893 for (
int i = ExtraSteps;
i > 0; --
i) {
9933 const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
9941 if (!Subtarget->hasFPARMv8())
9963 if (Constraint ==
"Upa")
9965 if (Constraint ==
"Upl")
9973 AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
9974 if (Constraint.
size() == 1) {
9975 switch (Constraint[0]) {
10009 AArch64TargetLowering::getSingleConstraintMatchWeight(
10010 AsmOperandInfo &
info,
const char *constraint)
const {
10012 Value *CallOperandVal =
info.CallOperandVal;
10015 if (!CallOperandVal)
10019 switch (*constraint) {
10026 if (
type->isFloatingPointTy() ||
type->isVectorTy())
10040 std::pair<unsigned, const TargetRegisterClass *>
10041 AArch64TargetLowering::getRegForInlineAsmConstraint(
10043 if (Constraint.
size() == 1) {
10044 switch (Constraint[0]) {
10047 return std::make_pair(0U,
nullptr);
10049 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
10051 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
10052 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
10054 if (!Subtarget->hasFPARMv8())
10058 return std::make_pair(0U, &AArch64::ZPRRegClass);
10059 return std::make_pair(0U,
nullptr);
10063 return std::make_pair(0U, &AArch64::FPR16RegClass);
10065 return std::make_pair(0U, &AArch64::FPR32RegClass);
10067 return std::make_pair(0U, &AArch64::FPR64RegClass);
10069 return std::make_pair(0U, &AArch64::FPR128RegClass);
10075 if (!Subtarget->hasFPARMv8())
10078 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
10080 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
10083 if (!Subtarget->hasFPARMv8())
10086 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
10093 return std::make_pair(0U,
nullptr);
10095 return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
10096 :
std::make_pair(0U, &AArch64::PPRRegClass);
10099 if (
StringRef(
"{cc}").equals_insensitive(Constraint))
10100 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
10104 std::pair<unsigned, const TargetRegisterClass *> Res;
10109 unsigned Size = Constraint.
size();
10110 if ((Size == 4 || Size == 5) && Constraint[0] ==
'{' &&
10111 tolower(Constraint[1]) ==
'v' && Constraint[Size - 1] ==
'}') {
10114 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
10119 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
10120 Res.second = &AArch64::FPR64RegClass;
10122 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
10123 Res.second = &AArch64::FPR128RegClass;
10129 if (Res.second && !Subtarget->hasFPARMv8() &&
10130 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
10131 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
10132 return std::make_pair(0U,
nullptr);
10139 bool AllowUnknown)
const {
10140 if (Subtarget->hasLS64() && Ty->
isIntegerTy(512))
10148 void AArch64TargetLowering::LowerAsmOperandForConstraint(
10149 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
10154 if (Constraint.length() != 1)
10157 char ConstraintLetter = Constraint[0];
10158 switch (ConstraintLetter) {
10181 dyn_cast<BlockAddressSDNode>(
Op)) {
10201 switch (ConstraintLetter) {
10209 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
10214 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
10215 CVal =
C->getSExtValue();
10242 if (!isUInt<32>(CVal))
10246 if ((CVal & 0xFFFF) == CVal)
10248 if ((CVal & 0xFFFF0000ULL) == CVal)
10251 if ((NCVal & 0xFFFFULL) == NCVal)
10253 if ((NCVal & 0xFFFF0000ULL) == NCVal)
10260 if ((CVal & 0xFFFFULL) == CVal)
10262 if ((CVal & 0xFFFF0000ULL) == CVal)
10264 if ((CVal & 0xFFFF00000000ULL) == CVal)
10266 if ((CVal & 0xFFFF000000000000ULL) == CVal)
10269 if ((NCVal & 0xFFFFULL) == NCVal)
10271 if ((NCVal & 0xFFFF0000ULL) == NCVal)
10273 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
10275 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
10289 Ops.push_back(Result);
10337 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
10339 EVT VT =
Op.getValueType();
10341 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
10344 struct ShuffleSourceInfo {
10359 ShuffleSourceInfo(
SDValue Vec)
10361 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
10369 for (
unsigned i = 0;
i < NumElts; ++
i) {
10377 dbgs() <<
"Reshuffle failed: "
10378 "a shuffle can only come from building a vector from "
10379 "various elements of other fixed-width vectors, provided "
10380 "their indices are constant\n");
10387 if (
Source == Sources.end())
10388 Source = Sources.
insert(Sources.end(), ShuffleSourceInfo(SourceVec));
10391 unsigned EltNo = cast<ConstantSDNode>(V.
getOperand(1))->getZExtValue();
10398 if ((Sources.size() == 3 || Sources.size() == 4) && NumElts > 4) {
10403 for (
unsigned I = 0;
I < NumElts; ++
I) {
10406 for (
unsigned OF = 0; OF < OutputFactor; OF++)
10407 Mask.push_back(-1);
10414 for (
unsigned S = 0;
S < Sources.size();
S++) {
10416 unsigned InputSize = Sources[
S].Vec.getScalarValueSizeInBits();
10417 unsigned InputBase = 16 *
S + Lane * InputSize / 8;
10418 for (
unsigned OF = 0; OF < OutputFactor; OF++)
10419 Mask.push_back(InputBase + OF);
10428 TBLOperands.push_back(DAG.
getConstant(Sources.size() == 3
10429 ? Intrinsic::aarch64_neon_tbl3
10430 : Intrinsic::aarch64_neon_tbl4,
10432 for (
unsigned i = 0;
i < Sources.size();
i++) {
10434 EVT SrcVT = Src.getValueType();
10437 "Expected a legally typed vector");
10441 TBLOperands.push_back(Src);
10445 for (
unsigned i = 0;
i <
Mask.size();
i++)
10448 "Expected a v8i8 or v16i8 Mask");
10449 TBLOperands.push_back(
10458 if (Sources.size() > 2) {
10459 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: currently only do something "
10460 <<
"sensible when at most two source vectors are "
10468 for (
auto &
Source : Sources) {
10469 EVT SrcEltTy =
Source.Vec.getValueType().getVectorElementType();
10470 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
10471 SmallestEltTy = SrcEltTy;
10474 unsigned ResMultiplier =
10483 for (
auto &Src : Sources) {
10484 EVT SrcVT = Src.ShuffleVec.getValueType();
10497 assert(2 * SrcVTSize == VTSize);
10502 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
10508 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
10512 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
10514 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
10518 if (Src.MinElt >= NumSrcElts) {
10523 Src.WindowBase = -NumSrcElts;
10524 }
else if (Src.MaxElt < NumSrcElts) {
10541 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
10542 "for SVE vectors.");
10549 Src.WindowBase = -Src.MinElt;
10556 for (
auto &Src : Sources) {
10557 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
10558 if (SrcEltTy == SmallestEltTy)
10564 Src.WindowBase *= Src.WindowScale;
10570 assert(Src.ShuffleVec.getValueType() == ShuffleVT););
10577 if (Entry.isUndef())
10580 auto Src =
find(Sources, Entry.getOperand(0));
10581 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
10586 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
10589 int LanesDefined = BitsDefined / BitsPerShuffleLane;
10593 int *LaneMask = &
Mask[
i * ResMultiplier];
10595 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
10596 ExtractBase += NumElts * (Src - Sources.begin());
10597 for (
int j = 0;
j < LanesDefined; ++
j)
10598 LaneMask[
j] = ExtractBase +
j;
10603 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
10608 for (
unsigned i = 0;
i < Sources.size(); ++
i)
10616 dbgs() <<
"Reshuffle, creating node: "; V.
dump(););
10635 unsigned ExpectedElt =
Imm;
10636 for (
unsigned i = 1;
i < NumElts; ++
i) {
10640 if (ExpectedElt == NumElts)
10645 if (ExpectedElt !=
static_cast<unsigned>(
M[
i]))
10660 for (
unsigned X = 0;
X < 4;
X++) {
10667 !isa<ConstantSDNode>(BaseExt.
getOperand(1)) ||
10672 for (
unsigned Y = 1;
Y < 4;
Y++) {
10676 !isa<ConstantSDNode>(
Ext.getOperand(1)) ||
10677 Ext.getConstantOperandVal(1) !=
Y)
10707 unsigned &DupLaneOp) {
10709 "Only possible block sizes for wide DUP are: 16, 32, 64");
10728 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
10729 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
10730 int Elt =
M[BlockIndex * NumEltsPerBlock +
I];
10734 if ((
unsigned)Elt >= SingleVecNumElements)
10736 if (BlockElts[
I] < 0)
10737 BlockElts[
I] = Elt;
10738 else if (BlockElts[
I] != Elt)
10747 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
10748 assert(FirstRealEltIter != BlockElts.end() &&
10749 "Shuffle with all-undefs must have been caught by previous cases, "
10751 if (FirstRealEltIter == BlockElts.end()) {
10757 size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
10759 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
10762 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
10765 if (Elt0 % NumEltsPerBlock != 0)
10769 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
10770 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
10773 DupLaneOp = Elt0 / NumEltsPerBlock;
10782 const int *FirstRealElt =
find_if(
M, [](
int Elt) {
return Elt >= 0; });
10787 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1);
10790 bool FoundWrongElt =
std::any_of(FirstRealElt + 1,
M.end(), [&](
int Elt) {
10791 return Elt != ExpectedElt++ && Elt != -1;
10824 "Only possible block sizes for REV are: 16, 32, 64, 128");
10828 unsigned BlockElts =
M[0] + 1;
10836 for (
unsigned i = 0;
i < NumElts; ++
i) {
10839 if ((
unsigned)
M[
i] != (
i -
i % BlockElts) + (BlockElts - 1 -
i % BlockElts))
10848 if (NumElts % 2 != 0)
10850 WhichResult = (
M[0] == 0 ? 0 : 1);
10851 unsigned Idx = WhichResult * NumElts / 2;
10852 for (
unsigned i = 0;
i != NumElts;
i += 2) {
10853 if ((
M[
i] >= 0 && (
unsigned)
M[
i] != Idx) ||
10854 (
M[
i + 1] >= 0 && (
unsigned)
M[
i + 1] != Idx + NumElts))
10864 WhichResult = (
M[0] == 0 ? 0 : 1);
10865 for (
unsigned i = 0;
i != NumElts; ++
i) {
10868 if ((
unsigned)
M[
i] != 2 *
i + WhichResult)
10877 if (NumElts % 2 != 0)
10879 WhichResult = (
M[0] == 0 ? 0 : 1);
10880 for (
unsigned i = 0;
i < NumElts;
i += 2) {
10881 if ((
M[
i] >= 0 && (
unsigned)
M[
i] !=
i + WhichResult) ||
10882 (
M[
i + 1] >= 0 && (
unsigned)
M[
i + 1] !=
i + NumElts + WhichResult))
10893 if (NumElts % 2 != 0)
10895 WhichResult = (
M[0] == 0 ? 0 : 1);
10896 unsigned Idx = WhichResult * NumElts / 2;
10897 for (
unsigned i = 0;
i != NumElts;
i += 2) {
10898 if ((
M[
i] >= 0 && (
unsigned)
M[
i] != Idx) ||
10912 WhichResult = (
M[0] == 0 ? 0 : 1);
10913 for (
unsigned j = 0;
j != 2; ++
j) {
10914 unsigned Idx = WhichResult;
10915 for (
unsigned i = 0;
i != Half; ++
i) {
10916 int MIdx =
M[
i +
j * Half];
10917 if (MIdx >= 0 && (
unsigned)MIdx != Idx)
10931 if (NumElts % 2 != 0)
10933 WhichResult = (
M[0] == 0 ? 0 : 1);
10934 for (
unsigned i = 0;
i < NumElts;
i += 2) {
10935 if ((
M[
i] >= 0 && (
unsigned)
M[
i] !=
i + WhichResult) ||
10943 bool &DstIsLeft,
int &Anomaly) {
10944 if (
M.size() !=
static_cast<size_t>(NumInputElements))
10947 int NumLHSMatch = 0, NumRHSMatch = 0;
10948 int LastLHSMismatch = -1, LastRHSMismatch = -1;
10950 for (
int i = 0;
i < NumInputElements; ++
i) {
10960 LastLHSMismatch =
i;
10962 if (
M[
i] ==
i + NumInputElements)
10965 LastRHSMismatch =
i;
10968 if (NumLHSMatch == NumInputElements - 1) {
10970 Anomaly = LastLHSMismatch;
10972 }
else if (NumRHSMatch == NumInputElements - 1) {
10974 Anomaly = LastRHSMismatch;
10987 for (
int I = 0,
E = NumElts / 2;
I !=
E;
I++) {
10992 int Offset = NumElts / 2;
10993 for (
int I = NumElts / 2,
E = NumElts;
I !=
E;
I++) {
11003 EVT VT =
Op.getValueType();
11038 unsigned OpNum = (PFEntry >> 26) & 0x0F;
11039 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
11040 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
11062 if (LHSID == (1 * 9 + 2) * 9 + 3)
11064 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
11068 if (OpNum == OP_MOVLANE) {
11070 auto getPFIDLane = [](
unsigned ID,
int Elt) ->
int {
11071 assert(Elt < 4 &&
"Expected Perfect Lanes to be less than 4");
11077 return (
ID % 9 == 8) ? -1 :
ID % 9;
11086 assert(RHSID < 8 &&
"Expected a lane index for RHSID!");
11087 unsigned ExtLane = 0;
11093 int MaskElt = getPFIDLane(
ID, (RHSID & 0x01) << 1) >> 1;
11095 MaskElt = (getPFIDLane(
ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
11096 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
11097 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
11098 Input = MaskElt < 2 ? V1 :
V2;
11104 "Expected 16 or 32 bit shuffle elemements");
11109 int MaskElt = getPFIDLane(
ID, RHSID);
11110 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
11111 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
11112 Input = MaskElt < 4 ? V1 :
V2;
11171 return DAG.
getNode(Opcode, dl, VT, OpLHS, Lane);
11208 EVT EltVT =
Op.getValueType().getVectorElementType();
11222 unsigned IndexLen = 8;
11223 if (
Op.getValueSizeInBits() == 128) {
11229 for (
int Val : ShuffleMask) {
11230 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
11231 unsigned Offset = Byte + Val * BytesPerElt;
11234 if (IsUndefOrZero &&
Offset >= IndexLen)
11244 if (IsUndefOrZero) {
11252 if (IndexLen == 8) {
11291 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
11302 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
11304 if (ExtIdxInBits % CastedEltBitWidth != 0)
11312 LaneC += ExtIdxInBits / CastedEltBitWidth;
11319 unsigned SrcVecNumElts =
11326 if (getScaledOffsetDup(V, Lane, CastVT)) {
11353 if (NumElts % 2 != 0)
11357 for (
unsigned i = 0;
i < NumElts;
i += 2) {
11362 if (
M0 == -1 &&
M1 == -1) {
11363 NewMask.push_back(-1);
11367 if (
M0 == -1 &&
M1 != -1 && (
M1 % 2) == 1) {
11368 NewMask.push_back(
M1 / 2);
11372 if (
M0 != -1 && (
M0 % 2) == 0 && ((
M0 + 1) ==
M1 ||
M1 == -1)) {
11373 NewMask.push_back(
M0 / 2);
11381 assert(NewMask.size() == NumElts / 2 &&
"Incorrect size for mask!");
11397 EVT VT =
Op.getValueType();
11407 if (ElementSize > 32 || ElementSize == 1)
11437 EVT VT =
Op.getValueType();
11451 for (
unsigned I = 0;
I < 16;
I++) {
11452 if (ShuffleMask[
I] < 16)
11456 dyn_cast<ConstantSDNode>(Mask2->
getOperand(ShuffleMask[
I] - 16));
11476 AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(
SDValue Op,
11479 EVT VT =
Op.getValueType();
11483 "Unexpected extension factor.");
11496 EVT VT =
Op.getValueType();
11502 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
11515 "Unexpected VECTOR_SHUFFLE mask size!");
11541 for (
unsigned LaneSize : {64U, 32U, 16U}) {
11553 V1 =
constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
11574 bool ReverseEXT =
false;
11588 unsigned WhichResult;
11589 if (
isZIPMask(ShuffleMask, VT, WhichResult)) {
11593 if (
isUZPMask(ShuffleMask, VT, WhichResult)) {
11597 if (
isTRNMask(ShuffleMask, VT, WhichResult)) {
11621 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
11626 int SrcLane = ShuffleMask[Anomaly];
11627 if (SrcLane >= NumInputElements) {
11650 if (NumElts == 4) {
11651 unsigned PFIndexes[4];
11652 for (
unsigned i = 0;
i != 4; ++
i) {
11653 if (ShuffleMask[
i] < 0)
11656 PFIndexes[
i] = ShuffleMask[
i];
11660 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
11661 PFIndexes[2] * 9 + PFIndexes[3];
11672 EVT VT =
Op.getValueType();
11676 return LowerToScalableOp(
Op, DAG);
11679 "Unexpected vector type!");
11682 if (isa<ConstantSDNode>(
Op.getOperand(0)))
11706 EVT VT =
Op.getValueType();
11718 auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
11719 if (CIdx && (CIdx->getZExtValue() <= 3)) {
11749 APInt &UndefBits) {
11751 APInt SplatBits, SplatUndef;
11752 unsigned SplatBitSize;
11754 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
11757 for (
unsigned i = 0;
i < NumSplats; ++
i) {
11758 CnstBits <<= SplatBitSize;
11759 UndefBits <<= SplatBitSize;
11761 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
11773 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
11775 EVT VT =
Op.getValueType();
11795 EVT VT =
Op.getValueType();
11800 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
11803 bool isAdvSIMDModImm =
false;
11823 if (isAdvSIMDModImm) {
11832 Mov = DAG.
getNode(NewOp, dl, MovTy,
11847 EVT VT =
Op.getValueType();
11852 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
11855 bool isAdvSIMDModImm =
false;
11867 if (isAdvSIMDModImm) {
11876 Mov = DAG.
getNode(NewOp, dl, MovTy,
11890 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
11892 EVT VT =
Op.getValueType();
11894 bool isAdvSIMDModImm =
false;
11906 if (isAdvSIMDModImm) {
11921 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
11923 EVT VT =
Op.getValueType();
11942 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
11944 EVT VT =
Op.getValueType();
11947 bool isAdvSIMDModImm =
false;
11959 if (isAdvSIMDModImm) {
11983 for (
unsigned i = 1;
i < NumElts; ++
i)
11984 if (dyn_cast<ConstantSDNode>(Bvec->
getOperand(
i)) != FirstElt)
11997 EVT VT =
N->getValueType(0);
12007 SDValue FirstOp =
N->getOperand(0);
12008 unsigned FirstOpc = FirstOp.
getOpcode();
12009 SDValue SecondOp =
N->getOperand(1);
12010 unsigned SecondOpc = SecondOp.
getOpcode();
12045 assert(C1nodeImm && C1nodeShift);
12054 if (C2 > ElemSizeInBits)
12057 APInt C1AsAPInt(ElemSizeInBits,
C1);
12060 if (C1AsAPInt != RequiredC1)
12082 return LowerToScalableOp(
Op, DAG);
12088 EVT VT =
Op.getValueType();
12092 dyn_cast<BuildVectorSDNode>(
Op.getOperand(1).getNode());
12095 LHS =
Op.getOperand(1);
12096 BVN = dyn_cast<BuildVectorSDNode>(
Op.getOperand(0).getNode());
12113 UndefBits, &
LHS)) ||
12129 EVT VT =
Op.getValueType();
12141 if (
auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
12143 CstLane->getZExtValue());
12145 }
else if (Lane.getNode()->isUndef()) {
12149 "Unexpected BUILD_VECTOR operand type");
12151 Ops.push_back(Lane);
12157 EVT VT =
Op.getValueType();
12172 DefBits = ~DefBits;
12178 DefBits = UndefBits;
12187 DefBits = ~UndefBits;
12199 EVT VT =
Op.getValueType();
12203 if (
auto SeqInfo = cast<BuildVectorSDNode>(
Op)->isConstantSequence()) {
12233 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
12234 if (Val.isZero() || Val.isAllOnes())
12256 bool isOnlyLowElement =
true;
12257 bool usesOnlyOneValue =
true;
12258 bool usesOnlyOneConstantValue =
true;
12260 bool AllLanesExtractElt =
true;
12261 unsigned NumConstantLanes = 0;
12262 unsigned NumDifferentLanes = 0;
12263 unsigned NumUndefLanes = 0;
12266 for (
unsigned i = 0;
i < NumElts; ++
i) {
12269 AllLanesExtractElt =
false;
12275 isOnlyLowElement =
false;
12280 ++NumConstantLanes;
12281 if (!ConstantValue.
getNode())
12283 else if (ConstantValue != V)
12284 usesOnlyOneConstantValue =
false;
12287 if (!
Value.getNode())
12289 else if (V !=
Value) {
12290 usesOnlyOneValue =
false;
12291 ++NumDifferentLanes;
12295 if (!
Value.getNode()) {
12297 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
12305 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
12306 "SCALAR_TO_VECTOR node\n");
12310 if (AllLanesExtractElt) {
12316 for (
unsigned i = 0;
i < NumElts; ++
i) {
12319 if (!isa<ConstantSDNode>(
N->getOperand(1)))
12331 }
else if (Vector != N0.
getNode()) {
12339 uint64_t Val =
N->getConstantOperandVal(1);
12340 if (Val == 2 *
i) {
12344 if (Val - 1 == 2 *
i) {
12373 if (usesOnlyOneValue) {
12376 Value.getValueType() != VT) {
12378 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
12386 if (
Value.getValueSizeInBits() == 64) {
12388 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
12401 EltTy ==
MVT::f64) &&
"Unsupported floating-point vector type");
12403 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
12404 "BITCASTS, and try again\n");
12406 for (
unsigned i = 0;
i < NumElts; ++
i)
12410 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
12412 Val = LowerBUILD_VECTOR(Val, DAG);
12422 bool PreferDUPAndInsert =
12424 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
12425 NumDifferentLanes >= NumConstantLanes;
12431 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
12442 for (
unsigned i = 0;
i < NumElts; ++
i) {
12456 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
12468 if (NumElts >= 4) {
12473 if (PreferDUPAndInsert) {
12478 for (
unsigned I = 0;
I < NumElts; ++
I)
12495 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
12496 "of INSERT_VECTOR_ELT\n");
12513 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
12518 <<
"Creating nodes for the other vector elements:\n";);
12519 for (;
i < NumElts; ++
i) {
12530 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
12531 "better alternative\n");
12539 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
12541 assert(
Op.getValueType().isScalableVector() &&
12543 "Expected legal scalable vector type!");
12546 unsigned NumOperands =
Op->getNumOperands();
12548 "Unexpected number of operands in CONCAT_VECTORS");
12550 if (NumOperands == 2)
12555 while (ConcatOps.size() > 1) {
12556 for (
unsigned I = 0,
E = ConcatOps.size();
I !=
E;
I += 2) {
12564 ConcatOps.resize(ConcatOps.size() / 2);
12566 return ConcatOps[0];
12578 return LowerFixedLengthInsertVectorElt(
Op, DAG);
12581 EVT VT =
Op.getOperand(0).getValueType();
12595 ExtendedValue,
Op.getOperand(2));
12621 Op.getOperand(1),
Op.getOperand(2));
12627 AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
12630 EVT VT =
Op.getOperand(0).getValueType();
12641 Extend,
Op.getOperand(1));
12647 return LowerFixedLengthExtractVectorElt(
Op, DAG);
12682 assert(
Op.getValueType().isFixedLengthVector() &&
12683 "Only cases that extract a fixed length vector are supported!");
12685 EVT InVT =
Op.getOperand(0).getValueType();
12686 unsigned Idx = cast<ConstantSDNode>(
Op.getOperand(1))->getZExtValue();
12687 unsigned Size =
Op.getValueSizeInBits();
12729 assert(
Op.getValueType().isScalableVector() &&
12730 "Only expect to lower inserts into scalable vectors!");
12732 EVT InVT =
Op.getOperand(1).getValueType();
12733 unsigned Idx = cast<ConstantSDNode>(
Op.getOperand(2))->getZExtValue();
12738 EVT VT =
Op.getValueType();
12754 if (Idx < (NumElts / 2)) {
12778 Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
12779 Vec1 = getSVESafeBitCast(WideVT, Vec1, DAG);
12794 "Invalid subvector index!");
12799 return getSVESafeBitCast(VT, Narrow, DAG);
12807 std::optional<unsigned> PredPattern =
12829 !isa<ConstantSDNode>(
Op->getOperand(0)))
12832 SplatVal =
Op->getConstantOperandVal(0);
12833 if (
Op.getValueType().getVectorElementType() !=
MVT::i64)
12834 SplatVal = (int32_t)SplatVal;
12842 SplatVal = -SplatVal;
12850 EVT VT =
Op.getValueType();
12854 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
12875 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
12893 SDValue ResultLo = DAG.
getNode(
Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
12894 SDValue ResultHi = DAG.
getNode(
Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
12913 unsigned DummyUnsigned;
12917 isEXTMask(
M, VT, DummyBool, DummyUnsigned) ||
12940 Op =
Op.getOperand(0);
12942 APInt SplatBits, SplatUndef;
12943 unsigned SplatBitSize;
12945 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
12946 HasAnyUndefs, ElementBits) ||
12947 SplatBitSize > ElementBits)
12958 assert(VT.
isVector() &&
"vector shift count is not a vector type");
12962 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
12969 assert(VT.
isVector() &&
"vector shift count is not a vector type");
12973 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
12978 EVT VT =
Op.getValueType();
12983 EVT OpVT =
Op.getOperand(0).getValueType();
12995 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
13002 EVT VT =
Op.getValueType();
13006 if (!
Op.getOperand(1).getValueType().isVector())
13010 switch (
Op.getOpcode()) {
13017 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
13023 Op.getOperand(0),
Op.getOperand(1));
13031 return LowerToPredicatedOp(
Op, DAG, Opc);
13035 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
13038 return DAG.
getNode(Opc,
DL, VT,
Op.getOperand(0),
13045 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
13046 : Intrinsic::aarch64_neon_ushl;
13054 return NegShiftLeft;
13063 EVT SrcVT =
LHS.getValueType();
13065 "function only supposed to emit natural comparisons");
13071 bool IsZero = IsCnst && (CnstBits == 0);
13083 return DAG.
getNOT(dl, Fcmeq, VT);
13127 return DAG.
getNOT(dl, Cmeq, VT);
13162 if (
Op.getValueType().isScalableVector())
13167 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
13172 EVT CmpVT =
LHS.getValueType().changeVectorElementTypeToInteger();
13175 if (
LHS.getValueType().getVectorElementType().isInteger()) {
13187 if (!FullFP16 &&
LHS.getValueType().getVectorElementType() ==
MVT::f16) {
13188 if (
LHS.getValueType().getVectorNumElements() == 4) {
13199 LHS.getValueType().getVectorElementType() !=
MVT::f128);
13210 if (!
Cmp.getNode())
13243 EVT SrcVT = Src.getValueType();
13256 return LowerPredReductionToSVE(
Op, DAG);
13258 switch (
Op.getOpcode()) {
13288 switch (
Op.getOpcode()) {
13319 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
13324 MVT VT =
Op.getSimpleValueType();
13329 Op.getOperand(0),
Op.getOperand(1),
RHS,
13337 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
13342 MVT VT =
Op.getSimpleValueType();
13348 Op.getOperand(0),
Op.getOperand(1),
RHS,
13352 SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
13382 AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
13385 "Only Windows alloca probing supported");
13392 cast<ConstantSDNode>(
Op.getOperand(2))->getMaybeAlignValue();
13393 EVT VT = Node->getValueType(0);
13396 "no-stack-arg-probe")) {
13404 SDValue Ops[2] = {SP, Chain};
13410 Chain = LowerWindowsDYNAMIC_STACKALLOC(
Op, Chain, Size, DAG);
13422 SDValue Ops[2] = {SP, Chain};
13428 EVT VT =
Op.getValueType();
13432 APInt MulImm = cast<ConstantSDNode>(
Op.getOperand(0))->getAPIntValue();
13438 template <
unsigned NumVecs>
13441 AArch64TargetLowering::IntrinsicInfo &
Info,
const CallInst &CI) {
13448 for (
unsigned I = 0;
I < NumVecs; ++
I)
13457 Info.align.reset();
13468 unsigned Intrinsic)
const {
13469 auto &
DL =
I.getModule()->getDataLayout();
13470 switch (Intrinsic) {
13471 case Intrinsic::aarch64_sve_st2:
13472 return setInfoSVEStN<2>(*
this,
DL,
Info,
I);
13473 case Intrinsic::aarch64_sve_st3:
13474 return setInfoSVEStN<3>(*
this,
DL,
Info,
I);
13475 case Intrinsic::aarch64_sve_st4:
13476 return setInfoSVEStN<4>(*
this,
DL,
Info,
I);
13477 case Intrinsic::aarch64_neon_ld2:
13478 case Intrinsic::aarch64_neon_ld3:
13479 case Intrinsic::aarch64_neon_ld4:
13480 case Intrinsic::aarch64_neon_ld1x2:
13481 case Intrinsic::aarch64_neon_ld1x3:
13482 case Intrinsic::aarch64_neon_ld1x4:
13483 case Intrinsic::aarch64_neon_ld2lane:
13484 case Intrinsic::aarch64_neon_ld3lane:
13485 case Intrinsic::aarch64_neon_ld4lane:
13486 case Intrinsic::aarch64_neon_ld2r:
13487 case Intrinsic::aarch64_neon_ld3r:
13488 case Intrinsic::aarch64_neon_ld4r: {
13491 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
13493 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
13495 Info.align.reset();
13500 case Intrinsic::aarch64_neon_st2:
13501 case Intrinsic::aarch64_neon_st3:
13502 case Intrinsic::aarch64_neon_st4:
13503 case Intrinsic::aarch64_neon_st1x2:
13504 case Intrinsic::aarch64_neon_st1x3:
13505 case Intrinsic::aarch64_neon_st1x4:
13506 case Intrinsic::aarch64_neon_st2lane:
13507 case Intrinsic::aarch64_neon_st3lane:
13508 case Intrinsic::aarch64_neon_st4lane: {
13511 unsigned NumElts = 0;
13513 Type *ArgTy =
Arg->getType();
13516 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
13519 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
13521 Info.align.reset();
13526 case Intrinsic::aarch64_ldaxr:
13527 case Intrinsic::aarch64_ldxr: {
13528 Type *ValTy =
I.getParamElementType(0);
13531 Info.ptrVal =
I.getArgOperand(0);
13533 Info.align =
DL.getABITypeAlign(ValTy);
13537 case Intrinsic::aarch64_stlxr:
13538 case Intrinsic::aarch64_stxr: {
13539 Type *ValTy =
I.getParamElementType(1);
13542 Info.ptrVal =
I.getArgOperand(1);
13544 Info.align =
DL.getABITypeAlign(ValTy);
13548 case Intrinsic::aarch64_ldaxp:
13549 case Intrinsic::aarch64_ldxp:
13552 Info.ptrVal =
I.getArgOperand(0);
13557 case Intrinsic::aarch64_stlxp:
13558 case Intrinsic::aarch64_stxp:
13561 Info.ptrVal =
I.getArgOperand(2);
13566 case Intrinsic::aarch64_sve_ldnt1: {
13567 Type *ElTy = cast<VectorType>(
I.getType())->getElementType();
13570 Info.ptrVal =
I.getArgOperand(1);
13572 Info.align =
DL.getABITypeAlign(ElTy);
13576 case Intrinsic::aarch64_sve_stnt1: {
13578 cast<VectorType>(
I.getArgOperand(0)->getType())->getElementType();
13581 Info.ptrVal =
I.getArgOperand(2);
13583 Info.align =
DL.getABITypeAlign(ElTy);
13587 case Intrinsic::aarch64_mops_memset_tag: {
13588 Value *Dst =
I.getArgOperand(0);
13589 Value *Val =
I.getArgOperand(1);
13594 Info.align =
I.getParamAlign(0).valueOrOne();
13625 Base.getOperand(1).hasOneUse() &&
13632 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
13634 if (ShiftAmount ==
Log2_32(LoadBytes))
13647 return NumBits1 > NumBits2;
13654 return NumBits1 > NumBits2;
13661 if (
I->getOpcode() != Instruction::FMul)
13664 if (!
I->hasOneUse())
13669 if (!(
User->getOpcode() == Instruction::FSub ||
13670 User->getOpcode() == Instruction::FAdd))
13691 return NumBits1 == 32 && NumBits2 == 64;
13698 return NumBits1 == 32 && NumBits2 == 64;
13716 bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *
Ext)
const {
13717 if (isa<FPExtInst>(
Ext))
13721 if (
Ext->getType()->isVectorTy())
13724 for (
const Use &U :
Ext->uses()) {
13729 const Instruction *Instr = cast<Instruction>(U.getUser());
13733 case Instruction::Shl:
13734 if (!isa<ConstantInt>(Instr->
getOperand(1)))
13737 case Instruction::GetElementPtr: {
13739 auto &
DL =
Ext->getModule()->getDataLayout();
13740 std::advance(GTI, U.getOperandNo()-1);
13751 if (ShiftAmt == 0 || ShiftAmt > 4)
13755 case Instruction::Trunc:
13758 if (Instr->
getType() ==
Ext->getOperand(0)->getType())
13772 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
13773 return all_equal(Shuf->getShuffleMask());
13780 bool AllowSplat =
false) {
13781 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
13782 auto *FullTy = FullV->
getType();
13783 auto *HalfTy = HalfV->getType();
13785 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
13788 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
13789 auto *FullVT = cast<FixedVectorType>(FullV->
getType());
13790 auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
13791 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
13795 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
13809 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
13810 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
13817 int NumElements = cast<FixedVectorType>(Op1->
getType())->getNumElements() * 2;
13824 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
13825 (M2Start != 0 && M2Start != (NumElements / 2)))
13827 if (S1Op1 && S2Op1 && M1Start != M2Start)
13837 return Ext->getType()->getScalarSizeInBits() ==
13838 2 *
Ext->getOperand(0)->getType()->getScalarSizeInBits();
13843 !areExtDoubled(cast<Instruction>(Ext1)) ||
13844 !areExtDoubled(cast<Instruction>(Ext2)))
13852 Value *VectorOperand =
nullptr;
13857 isa<FixedVectorType>(VectorOperand->
getType()) &&
13858 cast<FixedVectorType>(VectorOperand->
getType())->getNumElements() == 2;
13872 switch (II->getIntrinsicID()) {
13873 case Intrinsic::aarch64_neon_smull:
13874 case Intrinsic::aarch64_neon_umull:
13877 Ops.push_back(&II->getOperandUse(0));
13878 Ops.push_back(&II->getOperandUse(1));
13883 case Intrinsic::fma:
13884 if (isa<VectorType>(
I->getType()) &&
13885 cast<VectorType>(
I->getType())->getElementType()->isHalfTy() &&
13886 !Subtarget->hasFullFP16())
13889 case Intrinsic::aarch64_neon_sqdmull:
13890 case Intrinsic::aarch64_neon_sqdmulh:
13891 case Intrinsic::aarch64_neon_sqrdmulh:
13894 Ops.push_back(&II->getOperandUse(0));
13896 Ops.push_back(&II->getOperandUse(1));
13897 return !Ops.empty();
13898 case Intrinsic::aarch64_sve_ptest_first:
13899 case Intrinsic::aarch64_sve_ptest_last:
13900 if (
auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0)))
13901 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
13902 Ops.push_back(&II->getOperandUse(0));
13903 return !Ops.empty();
13904 case Intrinsic::aarch64_sme_write_horiz:
13905 case Intrinsic::aarch64_sme_write_vert:
13906 case Intrinsic::aarch64_sme_writeq_horiz:
13907 case Intrinsic::aarch64_sme_writeq_vert: {
13908 auto *Idx = dyn_cast<Instruction>(II->getOperand(1));
13911 Ops.push_back(&II->getOperandUse(1));
13914 case Intrinsic::aarch64_sme_read_horiz:
13915 case Intrinsic::aarch64_sme_read_vert:
13916 case Intrinsic::aarch64_sme_readq_horiz:
13917 case Intrinsic::aarch64_sme_readq_vert:
13918 case Intrinsic::aarch64_sme_ld1b_vert:
13919 case Intrinsic::aarch64_sme_ld1h_vert:
13920 case Intrinsic::aarch64_sme_ld1w_vert:
13921 case Intrinsic::aarch64_sme_ld1d_vert:
13922 case Intrinsic::aarch64_sme_ld1q_vert:
13923 case Intrinsic::aarch64_sme_st1b_vert:
13924 case Intrinsic::aarch64_sme_st1h_vert:
13925 case Intrinsic::aarch64_sme_st1w_vert:
13926 case Intrinsic::aarch64_sme_st1d_vert:
13927 case Intrinsic::aarch64_sme_st1q_vert:
13928 case Intrinsic::aarch64_sme_ld1b_horiz:
13929 case Intrinsic::aarch64_sme_ld1h_horiz:
13930 case Intrinsic::aarch64_sme_ld1w_horiz:
13931 case Intrinsic::aarch64_sme_ld1d_horiz:
13932 case Intrinsic::aarch64_sme_ld1q_horiz:
13933 case Intrinsic::aarch64_sme_st1b_horiz:
13934 case Intrinsic::aarch64_sme_st1h_horiz:
13935 case Intrinsic::aarch64_sme_st1w_horiz:
13936 case Intrinsic::aarch64_sme_st1d_horiz:
13937 case Intrinsic::aarch64_sme_st1q_horiz: {
13938 auto *Idx = dyn_cast<Instruction>(II->getOperand(3));
13941 Ops.push_back(&II->getOperandUse(3));
13944 case Intrinsic::aarch64_neon_pmull:
13947 Ops.push_back(&II->getOperandUse(0));
13948 Ops.push_back(&II->getOperandUse(1));
13950 case Intrinsic::aarch64_neon_pmull64:
13952 II->getArgOperand(1)))
13954 Ops.push_back(&II->getArgOperandUse(0));
13955 Ops.push_back(&II->getArgOperandUse(1));
13962 if (!
I->getType()->isVectorTy())
13965 switch (
I->getOpcode()) {
13966 case Instruction::Sub:
13973 auto Ext1 = cast<Instruction>(
I->getOperand(0));
13974 auto Ext2 = cast<Instruction>(
I->getOperand(1));
13976 Ops.push_back(&Ext1->getOperandUse(0));
13977 Ops.push_back(&Ext2->getOperandUse(0));
13980 Ops.push_back(&
I->getOperandUse(0));
13981 Ops.push_back(&
I->getOperandUse(1));
13986 int NumZExts = 0, NumSExts = 0;
13987 for (
auto &
Op :
I->operands()) {
13989 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
14007 Ops.push_back(&
Shuffle->getOperandUse(0));
14008 Ops.push_back(&
Op);
14029 dyn_cast<ConstantInt>(
Insert->getOperand(2));
14031 if (!ElementConstant || ElementConstant->
getZExtValue() != 0)
14034 unsigned Opcode = OperandInstr->
getOpcode();
14035 if (Opcode == Instruction::SExt)
14037 else if (Opcode == Instruction::ZExt)
14042 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
14044 const DataLayout &
DL =
I->getFunction()->getParent()->getDataLayout();
14050 Ops.push_back(&
Shuffle->getOperandUse(0));
14051 Ops.push_back(&
Op);
14055 return !Ops.empty() && (NumSExts == 2 || NumZExts == 2);
14065 auto *SrcTy = cast<FixedVectorType>(
Op->getType());
14066 auto *DstTy = cast<FixedVectorType>(ZExt->
getType());
14067 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
14068 auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
14069 assert(DstWidth % SrcWidth == 0 &&
14070 "TBL lowering is not supported for a ZExt instruction with this "
14071 "source & destination element type.");
14072 unsigned ZExtFactor = DstWidth / SrcWidth;
14073 unsigned NumElts = SrcTy->getNumElements();
14079 for (
unsigned i = 0;
i < NumElts * ZExtFactor;
i++) {
14080 if (IsLittleEndian) {
14081 if (
i % ZExtFactor == 0)
14082 Mask.push_back(
i / ZExtFactor);
14084 Mask.push_back(NumElts);
14086 if ((
i + 1) % ZExtFactor == 0)
14087 Mask.push_back((
i - ZExtFactor + 1) / ZExtFactor);
14089 Mask.push_back(NumElts);
14093 auto *FirstEltZero =
Builder.CreateInsertElement(
14096 Result =
Builder.CreateBitCast(Result, DstTy);
14104 int NumElements = cast<FixedVectorType>(TI->
getType())->getNumElements();
14106 auto *DstTy = cast<FixedVectorType>(TI->
getType());
14107 assert(SrcTy->getElementType()->isIntegerTy() &&
14108 "Non-integer type source vector element is not supported");
14109 assert(DstTy->getElementType()->isIntegerTy(8) &&
14110 "Unsupported destination vector element type");
14111 unsigned SrcElemTySz =
14112 cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
14113 unsigned DstElemTySz =
14114 cast<IntegerType>(DstTy->getElementType())->getBitWidth();
14115 assert((SrcElemTySz % DstElemTySz == 0) &&
14116 "Cannot lower truncate to tbl instructions for a source element size "
14117 "that is not divisible by the destination element size");
14118 unsigned TruncFactor = SrcElemTySz / DstElemTySz;
14119 assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
14120 "Unsupported source vector element type size");
14128 for (
int Itr = 0; Itr < 16; Itr++) {
14129 if (Itr < NumElements)
14130 MaskConst.push_back(
Builder.getInt8(
14131 IsLittleEndian ? Itr * TruncFactor
14132 : Itr * TruncFactor + (TruncFactor - 1)));
14134 MaskConst.push_back(
Builder.getInt8(255));
14137 int MaxTblSz = 128 * 4;
14138 int MaxSrcSz = SrcElemTySz * NumElements;
14140 (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
14141 assert(ElemsPerTbl <= 16 &&
14142 "Maximum elements selected using TBL instruction cannot exceed 16!");
14144 int ShuffleCount = 128 / SrcElemTySz;
14146 for (
int i = 0;
i < ShuffleCount; ++
i)
14147 ShuffleLanes.push_back(
i);
14153 while (ShuffleLanes.back() < NumElements) {
14154 Parts.push_back(
Builder.CreateBitCast(
14157 if (Parts.size() == 4) {
14159 Intrinsic::aarch64_neon_tbl4, VecTy);
14165 for (
int i = 0;
i < ShuffleCount; ++
i)
14166 ShuffleLanes[
i] += ShuffleCount;
14170 "Lowering trunc for vectors requiring different TBL instructions is "
14174 if (!Parts.empty()) {
14176 switch (Parts.size()) {
14178 TblID = Intrinsic::aarch64_neon_tbl1;
14181 TblID = Intrinsic::aarch64_neon_tbl2;
14184 TblID = Intrinsic::aarch64_neon_tbl3;
14196 "more than 2 tbl instructions!");
14199 if (ElemsPerTbl < 16) {
14201 std::iota(FinalMask.begin(), FinalMask.end(), 0);
14202 FinalResult =
Builder.CreateShuffleVector(
Results[0], FinalMask);
14206 if (ElemsPerTbl < 16) {
14207 std::iota(FinalMask.begin(), FinalMask.begin() + ElemsPerTbl, 0);
14208 std::iota(FinalMask.begin() + ElemsPerTbl, FinalMask.end(), 16);
14210 std::iota(FinalMask.begin(), FinalMask.end(), 0);
14232 if (!L || L->
getHeader() !=
I->getParent() ||
F->hasMinSize() ||
14236 auto *SrcTy = dyn_cast<FixedVectorType>(
I->getOperand(0)->getType());
14237 auto *DstTy = dyn_cast<FixedVectorType>(
I->getType());
14238 if (!SrcTy || !DstTy)
14244 auto *ZExt = dyn_cast<ZExtInst>(
I);
14245 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
14246 auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
14247 if (DstWidth % 8 == 0 && DstWidth > 16 && DstWidth < 64) {
14253 auto *UIToFP = dyn_cast<UIToFPInst>(
I);
14254 if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
14255 DstTy->getElementType()->isFloatTy()) {
14257 auto *ZExt = cast<ZExtInst>(
14259 auto *UI =
Builder.CreateUIToFP(ZExt, DstTy);
14260 I->replaceAllUsesWith(UI);
14261 I->eraseFromParent();
14268 auto *FPToUI = dyn_cast<FPToUIInst>(
I);
14270 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
14271 SrcTy->getElementType()->isFloatTy() &&
14272 DstTy->getElementType()->isIntegerTy(8)) {
14274 auto *WideConv =
Builder.CreateFPToUI(FPToUI->getOperand(0),
14276 auto *TruncI =
Builder.CreateTrunc(WideConv, DstTy);
14277 I->replaceAllUsesWith(TruncI);
14278 I->eraseFromParent();
14287 auto *TI = dyn_cast<TruncInst>(
I);
14288 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
14289 ((SrcTy->getElementType()->isIntegerTy(32) ||
14290 SrcTy->getElementType()->isIntegerTy(64)) &&
14291 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
14300 Align &RequiredAligment)
const {
14305 RequiredAligment =
Align(1);
14307 return NumBits == 32 || NumBits == 64;
14317 return std::max<unsigned>(1, (
DL.getTypeSizeInBits(VecTy) + 127) /
VecSize);
14331 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
14333 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
14335 UseScalable =
false;
14342 if (NumElements < 2)
14346 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
14352 (VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
14354 UseScalable =
true;
14406 "Invalid interleave factor");
14407 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
14409 "Unmatched number of shufflevectors and indices");
14419 if (!Subtarget->hasNEON() ||
14425 auto *FVTy = cast<FixedVectorType>(VTy);
14429 Type *EltTy = FVTy->getElementType();
14437 FVTy->getNumElements() / NumLoads);
14447 if (NumLoads > 1) {
14451 BaseAddr =
Builder.CreateBitCast(
14461 LDVTy->getElementCount());
14464 Intrinsic::aarch64_sve_ld2_sret, Intrinsic::aarch64_sve_ld3_sret,
14465 Intrinsic::aarch64_sve_ld4_sret};
14466 static const Intrinsic::ID NEONLoadIntrs[3] = {Intrinsic::aarch64_neon_ld2,
14467 Intrinsic::aarch64_neon_ld3,
14468 Intrinsic::aarch64_neon_ld4};
14472 SVELoadIntrs[Factor - 2], {LDVTy});
14475 LI->
getModule(), NEONLoadIntrs[Factor - 2], {LDVTy, PtrTy});
14482 Value *PTrue =
nullptr;
14484 std::optional<unsigned> PgPattern =
14493 PTrue =
Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
14497 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
14502 BaseAddr =
Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
14503 FVTy->getNumElements() * Factor);
14508 LdNFunc, {PTrue,
Builder.CreateBitCast(BaseAddr, PtrTy)},
"ldN");
14510 LdN =
Builder.CreateCall(LdNFunc,
Builder.CreateBitCast(BaseAddr, PtrTy),
14514 for (
unsigned i = 0;
i < Shuffles.
size();
i++) {
14516 unsigned Index = Indices[
i];
14521 SubVec =
Builder.CreateExtractVector(
14527 SubVec =
Builder.CreateIntToPtr(
14529 FVTy->getNumElements()));
14531 SubVecs[SVI].push_back(SubVec);
14540 auto &SubVec = SubVecs[SVI];
14543 SVI->replaceAllUsesWith(WideVec);
14577 unsigned Factor)
const {
14584 "Invalid interleave factor");
14586 auto *VecTy = cast<FixedVectorType>(SVI->
getType());
14587 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
14589 unsigned LaneLen = VecTy->getNumElements() / Factor;
14590 Type *EltTy = VecTy->getElementType();
14599 if (!Subtarget->hasNEON() ||
14612 Type *IntTy =
DL.getIntPtrType(EltTy);
14613 unsigned NumOpElts =
14614 cast<FixedVectorType>(Op0->
getType())->getNumElements();
14618 Op0 =
Builder.CreatePtrToInt(Op0, IntVecTy);
14619 Op1 =
Builder.CreatePtrToInt(Op1, IntVecTy);
14626 LaneLen /= NumStores;
14633 Value *BaseAddr =
SI->getPointerOperand();
14635 if (NumStores > 1) {
14639 BaseAddr =
Builder.CreateBitCast(
14641 SubVecTy->getElementType()->getPointerTo(
SI->getPointerAddressSpace()));
14654 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
Mask[0] != 0)
14659 ? STVTy->getElementType()->getPointerTo(
SI->getPointerAddressSpace())
14660 : STVTy->getPointerTo(
SI->getPointerAddressSpace());
14662 STVTy->getElementCount());
14664 static const Intrinsic::ID SVEStoreIntrs[3] = {Intrinsic::aarch64_sve_st2,
14665 Intrinsic::aarch64_sve_st3,
14666 Intrinsic::aarch64_sve_st4};
14667 static const Intrinsic::ID NEONStoreIntrs[3] = {Intrinsic::aarch64_neon_st2,
14668 Intrinsic::aarch64_neon_st3,
14669 Intrinsic::aarch64_neon_st4};
14673 SVEStoreIntrs[Factor - 2], {STVTy});
14676 SI->getModule(), NEONStoreIntrs[Factor - 2], {STVTy, PtrTy});
14678 Value *PTrue =
nullptr;
14680 std::optional<unsigned> PgPattern =
14685 DL.getTypeSizeInBits(SubVecTy))
14690 PTrue =
Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
14694 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
14699 for (
unsigned i = 0;
i < Factor;
i++) {
14701 unsigned IdxI = StoreCount * LaneLen * Factor +
i;
14702 if (
Mask[IdxI] >= 0) {
14706 unsigned StartMask = 0;
14707 for (
unsigned j = 1;
j < LaneLen;
j++) {
14708 unsigned IdxJ = StoreCount * LaneLen * Factor +
j * Factor +
i;
14709 if (
Mask[IdxJ] >= 0) {
14710 StartMask =
Mask[IdxJ] -
j;
14732 Ops.push_back(PTrue);
14736 if (StoreCount > 0)
14737 BaseAddr =
Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
14738 BaseAddr, LaneLen * Factor);
14740 Ops.push_back(
Builder.CreateBitCast(BaseAddr, PtrTy));
14741 Builder.CreateCall(StNFunc, Ops);
14748 bool CanImplicitFloat = !FuncAttributes.
hasFnAttr(Attribute::NoImplicitFloat);
14749 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
14750 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
14754 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
14755 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
14756 if (
Op.isAligned(AlignCheck))
14764 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
14767 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(
MVT::f128,
Align(16)))
14778 bool CanImplicitFloat = !FuncAttributes.
hasFnAttr(Attribute::NoImplicitFloat);
14779 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
14780 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
14784 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
14785 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
14786 if (
Op.isAligned(AlignCheck))
14794 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
14797 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(
MVT::f128,
Align(16)))
14810 <<
": avoid UB for INT64_MIN\n");
14815 bool IsLegal = ((Immed >> 12) == 0 ||
14816 ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
14818 <<
" legal add imm: " << (IsLegal ?
"yes" :
"no") <<
"\n");
14844 if (
Insn.size() > 1)
14878 if (isa<ScalableVectorType>(Ty)) {
14880 DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
14889 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
14890 NumBytes = NumBits / 8;
14904 if (NumBytes &&
Offset > 0 && (
Offset / NumBytes) <= (1LL << 12) - 1 &&
14930 return Subtarget->hasFullFP16();
14963 static const MCPhysReg ScratchRegs[] = {
14964 AArch64::X16, AArch64::X17, AArch64::LR, 0
14966 return ScratchRegs;
14970 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
14979 "Expected shift op");
14981 SDValue ShiftLHS =
N->getOperand(0);
14982 EVT VT =
N->getValueType(0);
14988 isa<ConstantSDNode>(ShiftLHS.
getOperand(1))) {
14993 if (
auto *SRLC = dyn_cast<ConstantSDNode>(AndLHS.
getOperand(1))) {
14995 if (
auto *SHLC = dyn_cast<ConstantSDNode>(
N->getOperand(1)))
14996 return SRLC->getZExtValue() == SHLC->getZExtValue();
15008 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
15009 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
15010 "Expected XOR(SHIFT) pattern");
15013 auto *XorC = dyn_cast<ConstantSDNode>(
N->getOperand(1));
15014 auto *ShiftC = dyn_cast<ConstantSDNode>(
N->getOperand(0).getOperand(1));
15015 if (XorC && ShiftC) {
15016 unsigned MaskIdx, MaskLen;
15017 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
15018 unsigned ShiftAmt = ShiftC->getZExtValue();
15019 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
15020 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
15021 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
15022 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
15032 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
15034 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
15035 "Expected shift-shift mask");
15037 if (!
N->getOperand(0)->hasOneUse())
15041 EVT VT =
N->getValueType(0);
15043 auto *
C1 = dyn_cast<ConstantSDNode>(
N->getOperand(0).getOperand(1));
15044 auto *C2 = dyn_cast<ConstantSDNode>(
N->getOperand(1));
15045 return (!
C1 || !C2 ||
C1->getZExtValue() >= C2->getZExtValue());
15059 int64_t Val =
Imm.getSExtValue();
15063 if ((int64_t)Val < 0)
15066 Val &= (1LL << 32) - 1;
15074 unsigned Index)
const {
15087 EVT VT =
N->getValueType(0);
15088 if (!Subtarget->hasNEON() || !VT.
isVector())
15100 auto *ShiftAmt = dyn_cast<ConstantSDNode>(
Shift.getOperand(1));
15101 EVT ShiftEltTy =
Shift.getValueType().getVectorElementType();
15102 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
15129 SDValue VecReduceOp0 =
N->getOperand(0);
15130 unsigned Opcode = VecReduceOp0.
getOpcode();
15137 if (
ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
15142 unsigned Opcode0 =
SUB->getOperand(0).getOpcode();
15143 unsigned Opcode1 =
SUB->getOperand(1).getOpcode();
15150 bool IsZExt =
false;
15176 UABDHigh8Op0, UABDHigh8Op1);
15187 UABDLo8Op0, UABDLo8Op1);
15206 if (!
ST->hasDotProd())
15220 if (A.getOpcode() !=
B.getOpcode() ||
15221 A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
15223 ExtOpcode = A.getOpcode();
15228 EVT Op0VT = A.getOperand(0).getValueType();
15231 if (!IsValidElementCount || !IsValidSize)
15240 B =
B.getOperand(0);
15243 unsigned NumOfVecReduce;
15245 if (IsMultipleOf16) {
15255 if (NumOfVecReduce == 1) {
15258 A.getOperand(0),
B);
15265 for (;
I < VecReduce16Num;
I += 1) {
15274 SDotVec16.push_back(Dot);
15284 if (VecReduce8Num == 0)
15285 return VecReduceAdd16;
15308 auto DetectAddExtract = [&](
SDValue A) {
15313 EVT VT = A.getValueType();
15314 SDValue Op0 = A.getOperand(0);
15315 SDValue Op1 = A.getOperand(1);
15341 if (
SDValue R = DetectAddExtract(A))
15344 if (A.getOperand(0).getOpcode() ==
ISD::ADD && A.getOperand(0).hasOneUse())
15348 if (A.getOperand(1).getOpcode() ==
ISD::ADD && A.getOperand(1).hasOneUse())
15373 AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
15380 EVT VT =
N->getValueType(0);
15404 Created.push_back(
Cmp.getNode());
15405 Created.push_back(
Add.getNode());
15406 Created.push_back(CSel.
getNode());
15417 Created.push_back(
SRA.getNode());
15422 AArch64TargetLowering::BuildSREMPow2(
SDNode *
N,
const APInt &Divisor,
15429 EVT VT =
N->getValueType(0);
15455 Created.push_back(
Cmp.getNode());
15456 Created.push_back(
And.getNode());
15467 Created.push_back(Negs.
getNode());
15468 Created.push_back(AndPos.
getNode());
15469 Created.push_back(AndNeg.
getNode());
15479 case Intrinsic::aarch64_sve_cntb:
15481 case Intrinsic::aarch64_sve_cnth:
15483 case Intrinsic::aarch64_sve_cntw:
15485 case Intrinsic::aarch64_sve_cntd:
15513 return TypeNode->
getVT();
15523 if (
Mask == UCHAR_MAX)
15525 else if (
Mask == USHRT_MAX)
15527 else if (
Mask == UINT_MAX)
15549 unsigned ExtendOpcode = Extend.
getOpcode();
15572 unsigned Opc =
Op.getOpcode();
15583 EVT PreExtendLegalType =
15587 NewOps.push_back(
Op.isUndef() ? DAG.
getUNDEF(PreExtendLegalType)
15589 PreExtendLegalType));
15597 cast<ShuffleVectorSDNode>(BV)->getMask());
15606 EVT VT =
Mul->getValueType(0);
15625 EVT VT =
N->getValueType(0);
15629 if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
15630 N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
15643 if (!V1.
isMask(HalfSize) ||
V2 != (1ULL | 1ULL << HalfSize) ||
15644 V3 != (HalfSize - 1))
15673 EVT VT =
N->getValueType(0);
15677 unsigned AddSubOpc;
15679 auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
15686 if (
auto C = dyn_cast<ConstantSDNode>(Opnd))
15692 if (IsAddSubWith1(N0)) {
15694 return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
15697 if (IsAddSubWith1(N1)) {
15699 return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
15703 if (!isa<ConstantSDNode>(N1))
15707 const APInt &ConstValue =
C->getAPIntValue();
15714 if (ConstValue.
sge(1) && ConstValue.
sle(16))
15730 if (TrailingZeroes) {
15738 if (
N->hasOneUse() && (
N->use_begin()->getOpcode() ==
ISD::ADD ||
15739 N->use_begin()->getOpcode() ==
ISD::SUB))
15744 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
15747 auto Shl = [&](
SDValue N0,
unsigned N1) {
15786 APInt SCVMinus1 = ShiftedConstValue - 1;
15787 APInt SCVPlus1 = ShiftedConstValue + 1;
15788 APInt CVPlus1 = ConstValue + 1;
15792 return Shl(Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
15795 return Sub(Shl(N0, ShiftAmt), N0);
15797 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
15798 return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
15799 }
else if (Subtarget->hasLSLFast() &&
15800 isPowPlusPlusConst(ConstValue, CVM, CVN)) {
15801 APInt CVMMinus1 = CVM - 1;
15802 APInt CVNMinus1 = CVN - 1;
15803 unsigned ShiftM1 = CVMMinus1.
logBase2();
15804 unsigned ShiftN1 = CVNMinus1.
logBase2();
15806 if (ShiftM1 <= 3 && ShiftN1 <= 3) {
15807 SDValue MVal = Add(Shl(N0, ShiftM1), N0);
15808 return Add(Shl(MVal, ShiftN1), MVal);
15815 APInt SCVPlus1 = -ShiftedConstValue + 1;
15816 APInt CVNegPlus1 = -ConstValue + 1;
15817 APInt CVNegMinus1 = -ConstValue - 1;
15820 return Sub(N0, Shl(N0, ShiftAmt));
15822 ShiftAmt = CVNegMinus1.
logBase2();
15823 return Negate(Add(Shl(N0, ShiftAmt), N0));
15825 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
15826 return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
15846 EVT VT =
N->getValueType(0);
15848 N->getOperand(0)->getOperand(0)->getOpcode() !=
ISD::SETCC ||
15849 VT.
getSizeInBits() !=
N->getOperand(0)->getValueType(0).getSizeInBits())
15857 dyn_cast<BuildVectorSDNode>(
N->getOperand(0)->getOperand(1))) {
15859 if (!BV->isConstant())
15864 EVT IntVT = BV->getValueType(0);
15871 N->getOperand(0)->getOperand(0), MaskConst);
15886 EVT VT =
N->getValueType(0);
15891 if (VT.
getSizeInBits() !=
N->getOperand(0).getValueSizeInBits())
15900 !cast<LoadSDNode>(N0)->isVolatile()) {
15926 if (!
N->getValueType(0).isSimple())
15930 if (!
Op.getValueType().isSimple() ||
Op.getOpcode() !=
ISD::FMUL)
15933 if (!
Op.getValueType().is64BitVector() && !
Op.getValueType().is128BitVector())
15937 if (!isa<BuildVectorSDNode>(ConstVec))
15940 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
15942 if (FloatBits != 32 && FloatBits != 64 &&
15943 (FloatBits != 16 || !Subtarget->hasFullFP16()))
15946 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
15948 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
15952 if (IntBits > FloatBits)
15957 int32_t
Bits = IntBits == 64 ? 64 : 32;
15959 if (
C == -1 ||
C == 0 ||
C >
Bits)
15962 EVT ResTy =
Op.getValueType().changeVectorElementTypeToInteger();
15968 EVT SatVT = cast<VTSDNode>(
N->getOperand(1))->getVT();
15976 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
15977 : Intrinsic::aarch64_neon_vcvtfp2fxu;
15983 if (IntBits < FloatBits)
15994 if (!Subtarget->hasNEON())
15998 unsigned Opc =
Op->getOpcode();
15999 if (!
Op.getValueType().isVector() || !
Op.getValueType().isSimple() ||
16000 !
Op.getOperand(0).getValueType().isSimple() ||
16004 SDValue ConstVec =
N->getOperand(1);
16005 if (!isa<BuildVectorSDNode>(ConstVec))
16008 MVT IntTy =
Op.getOperand(0).getSimpleValueType().getVectorElementType();
16010 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
16013 MVT FloatTy =
N->getSimpleValueType(0).getVectorElementType();
16015 if (FloatBits != 32 && FloatBits != 64)
16019 if (IntBits > FloatBits)
16025 if (
C == -1 ||
C == 0 ||
C > FloatBits)
16029 unsigned NumLanes =
Op.getValueType().getVectorNumElements();
16030 switch (NumLanes) {
16047 if (IntBits < FloatBits)
16051 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
16052 : Intrinsic::aarch64_neon_vcvtfxu2fp;
16069 if (!isa<ConstantSDNode>(
N.getOperand(1)))
16072 ShiftAmount =
N->getConstantOperandVal(1);
16073 Src =
N->getOperand(0);
16086 EVT VT =
N->getValueType(0);
16095 bool LHSFromHi =
false;
16101 bool RHSFromHi =
false;
16107 if (LHSFromHi == RHSFromHi)
16124 EVT VT =
N->getValueType(0);
16151 for (
int i = 1;
i >= 0; --
i) {
16152 for (
int j = 1;
j >= 0; --
j) {
16155 SDValue Sub, Add, SubSibling, AddSibling;
16190 for (
int i = 1;
i >= 0; --
i)
16191 for (
int j = 1;
j >= 0; --
j) {
16194 if (!BVN0 || !BVN1)
16197 bool FoundMatch =
true;
16201 if (!CN0 || !CN1 ||
16203 FoundMatch =
false;
16227 EVT VT =
N->getValueType(0);
16275 auto *Op1 = dyn_cast<ConstantSDNode>(Cmp1.
getOperand(1));
16276 if (Op1 && Op1->getAPIntValue().isNegative() &&
16277 Op1->getAPIntValue().sgt(-32)) {
16284 NZCVOp, Condition, Cmp0);
16287 Cmp1.
getOperand(1), NZCVOp, Condition, Cmp0);
16298 EVT VT =
N->getValueType(0);
16323 MaskForTy = 0xffull;
16326 MaskForTy = 0xffffull;
16329 MaskForTy = 0xffffffffull;
16337 if (
auto *Op0 = dyn_cast<ConstantSDNode>(
N->getOperand(0)))
16338 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
16346 N =
N.getOperand(0);
16352 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
16356 N =
N.getOperand(0);
16359 if (
N.getValueType().getVectorMinNumElements() < NumElts)
16371 return N.getValueType().getVectorMinNumElements() >= NumElts;
16378 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
16379 if (MaxSVESize && MinSVESize == MaxSVESize) {
16381 unsigned PatNumElts =
16383 return PatNumElts == (NumElts * VScale);
16395 Op =
Op->getOperand(0);
16408 unsigned Opc = Src->getOpcode();
16412 SDValue UnpkOp = Src->getOperand(0);
16428 if ((ExtVal == 0xFF && EltTy ==
MVT::i8) ||
16429 (ExtVal == 0xFFFF && EltTy ==
MVT::i16) ||
16430 (ExtVal == 0xFFFFFFFF && EltTy ==
MVT::i32))
16450 return N->getOperand(1);
16452 return N->getOperand(0);
16459 if (!Src.hasOneUse())
16470 MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
16487 MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
16504 EVT VT =
N->getValueType(0);
16533 DefBits = ~DefBits;
16540 UndefBits = ~UndefBits;
16542 UndefBits, &
LHS)) ||
16569 (
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
16570 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
16571 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
16572 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
16573 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
16574 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
16575 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
16576 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
16578 N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
16641 if (
VS.getConstantOperandVal(0) != NumEls)
16660 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
16663 EVT VT =
N->getValueType(0);
16692 Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
16711 {N0->getOperand(0), Extract1, Extract2});
16725 EVT VT =
N->getValueType(0);
16726 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
16754 for (
size_t i = 0;
i <
Mask.size(); ++
i)
16764 if (
N->getOperand(0).getValueType() ==
MVT::v4i8) {
16768 if (
N->getNumOperands() % 2 == 0 &&
all_of(
N->op_values(), [](
SDValue V) {
16769 if (V.getValueType() != MVT::v4i8)
16773 LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
16774 return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
16775 LD->getExtensionType() == ISD::NON_EXTLOAD;
16781 for (
unsigned i = 0;
i <
N->getNumOperands();
i++) {
16789 LD->getMemOperand());
16791 Ops.push_back(NewLoad);
16809 N->isOnlyUserOf(N1.getNode())) {
16810 auto isBitwiseVectorNegate = [](
SDValue V) {
16845 if (
N->getNumOperands() == 2 && N0Opc == N1Opc &&
16865 if (N00Source == N10Source && N01Source == N11Source &&
16874 if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
16876 return DAG.
getNode(N0Opc, dl, VT, N00Source, N01Source);
16902 MVT RHSTy =
RHS.getValueType().getSimpleVT();
16908 dbgs() <<
"aarch64-lower: concat_vectors bitcast simplification\n");
16924 EVT VT =
N->getValueType(0);
16946 SDValue SubVec =
N->getOperand(1);
16947 uint64_t IdxVal =
N->getConstantOperandVal(2);
16958 if (IdxVal == 0 && Vec.
isUndef())
16964 (IdxVal != 0 && IdxVal != NumSubElts))
17009 EVT ResTy =
N->getValueType(0);
17047 MVT VT =
N.getSimpleValueType();
17049 N.getConstantOperandVal(1) == 0)
17050 N =
N.getOperand(0);
17052 switch (
N.getOpcode()) {
17077 if (
N.getValueType().is64BitVector()) {
17089 N =
N.getOperand(0);
17092 if (
N.getOperand(0).getValueType().isScalableVector())
17094 return cast<ConstantSDNode>(
N.getOperand(1))->getAPIntValue() ==
17095 N.getOperand(0).getValueType().getVectorNumElements() / 2;
17151 cast<ConstantSDNode>(
Op.getOperand(2))->getZExtValue());
17160 if (!TValue || !FValue)
17164 if (!TValue->
isOne()) {
17228 EVT VT =
Op->getValueType(0);
17235 EVT VT =
N->getValueType(0);
17246 auto *LHSN1 = dyn_cast<ConstantSDNode>(
LHS->getOperand(1));
17247 auto *RHSN1 = dyn_cast<ConstantSDNode>(
RHS->getOperand(1));
17248 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
17274 EVT VT =
N->getValueType(0);
17301 if (!CTVal || !CFVal)
17336 "Unexpected constant value");
17347 EVT VT =
N->getValueType(0);
17354 auto isZeroDot = [](
SDValue Dot) {
17359 if (!isZeroDot(Dot))
17361 if (!isZeroDot(Dot))
17374 EVT VT =
Op.getValueType();
17428 MVT VT =
N->getSimpleValueType(0);
17440 LHS.getOpcode() !=
RHS.getOpcode())
17443 unsigned ExtType =
LHS.getOpcode();
17449 if (!
RHS.getNode())
17455 if (!
LHS.getNode())
17466 !
Op.getNode()->hasAnyUseOfValue(0);
17473 return std::nullopt;
17476 return std::nullopt;
17484 return std::nullopt;
17508 Op->getOperand(0),
Op->getOperand(1),
17521 EVT VT =
N->getValueType(0);
17534 EVT VT =
N->getValueType(0);
17540 N->getOperand(0).getOperand(0).getValueType() !=
17541 N->getOperand(1).getOperand(0).getValueType())
17544 SDValue N0 =
N->getOperand(0).getOperand(0);
17545 SDValue N1 =
N->getOperand(1).getOperand(0);
17568 EVT VT =
N->getValueType(0);
17582 SDValue Elt0 =
N->getOperand(0), Elt1 =
N->getOperand(1);
17588 isa<ConstantSDNode>(Elt1->getOperand(1)) &&
17613 EVT VT =
N->getValueType(0);
17629 unsigned Opcode =
N.getOpcode();
17634 SrcVT = cast<VTSDNode>(
N.getOperand(1))->getVT();
17636 SrcVT =
N.getOperand(0).getValueType();
17644 return AndMask == 0xff || AndMask == 0xffff || AndMask == 0xffffffff;
17646 return isa<ConstantSDNode>(
N.getOperand(1));
17656 auto IsOneUseExtend = [](
SDValue N) {
17664 if (isa<ConstantSDNode>(
Z) || IsOneUseExtend(
Z))
17671 if (!IsOneUseExtend(
Shift))
17675 EVT VT =
N->getValueType(0);
17691 EVT VT =
N->getValueType(0);
17732 if (!Add.hasOneUse())
17738 SDValue M2 = Add.getOperand(1);
17746 EVT VT =
N->getValueType(0);
17788 assert(
LHS.getValueType().is64BitVector() &&
17789 RHS.getValueType().is64BitVector() &&
17790 "unexpected shape for long operation");
17797 if (!
RHS.getNode())
17801 if (!
LHS.getNode())
17813 MVT ElemTy =
N->getSimpleValueType(0).getScalarType();
17816 int64_t ShiftAmount;
17818 APInt SplatValue, SplatUndef;
17819 unsigned SplatBitSize;
17822 HasAnyUndefs, ElemBits) ||
17823 SplatBitSize != ElemBits)
17827 }
else if (
ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(
N->getOperand(2))) {
17828 ShiftAmount = CVN->getSExtValue();
17837 case Intrinsic::aarch64_neon_sqshl:
17839 IsRightShift =
false;
17841 case Intrinsic::aarch64_neon_uqshl:
17843 IsRightShift =
false;
17845 case Intrinsic::aarch64_neon_srshl:
17847 IsRightShift =
true;
17849 case Intrinsic::aarch64_neon_urshl:
17851 IsRightShift =
true;
17853 case Intrinsic::aarch64_neon_sqshlu:
17855 IsRightShift =
false;
17857 case Intrinsic::aarch64_neon_sshl:
17858 case Intrinsic::aarch64_neon_ushl:
17863 IsRightShift =
false;
17867 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(
int)ElemBits) {
17869 return DAG.
getNode(Opcode, dl,
N->getValueType(0),
N->getOperand(1),
17871 }
else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
17873 return DAG.
getNode(Opcode, dl,
N->getValueType(0),
N->getOperand(1),
17893 N->getOperand(0),
N->getOperand(1), AndN.
getOperand(0));
17901 N->getOperand(1).getSimpleValueType(),
17924 SDValue Scalar =
N->getOperand(3);
17925 EVT ScalarTy = Scalar.getValueType();
17930 SDValue Passthru =
N->getOperand(1);
17933 Pred, Scalar, Passthru);
17939 EVT VT =
N->getValueType(0);
17968 SDValue Comparator =
N->getOperand(3);
17972 EVT VT =
N->getValueType(0);
17973 EVT CmpVT =
N->getOperand(2).getValueType();
17984 case Intrinsic::aarch64_sve_cmpeq_wide:
17985 case Intrinsic::aarch64_sve_cmpne_wide:
17986 case Intrinsic::aarch64_sve_cmpge_wide:
17987 case Intrinsic::aarch64_sve_cmpgt_wide:
17988 case Intrinsic::aarch64_sve_cmplt_wide:
17989 case Intrinsic::aarch64_sve_cmple_wide: {
17990 if (
auto *CN = dyn_cast<ConstantSDNode>(Comparator.
getOperand(0))) {
17991 int64_t ImmVal = CN->getSExtValue();
17992 if (ImmVal >= -16 && ImmVal <= 15)
18000 case Intrinsic::aarch64_sve_cmphs_wide:
18001 case Intrinsic::aarch64_sve_cmphi_wide:
18002 case Intrinsic::aarch64_sve_cmplo_wide:
18003 case Intrinsic::aarch64_sve_cmpls_wide: {
18004 if (
auto *CN = dyn_cast<ConstantSDNode>(Comparator.
getOperand(0))) {
18031 assert(
Op.getValueType().isScalableVector() &&
18033 "Expected legal scalable vector type!");
18035 "Expected same type for PTEST operands");
18069 SDValue VecToReduce =
N->getOperand(2);
18088 SDValue VecToReduce =
N->getOperand(2);
18105 SDValue InitVal =
N->getOperand(2);
18106 SDValue VecToReduce =
N->getOperand(3);
18113 DAG.
getUNDEF(ReduceVT), InitVal, Zero);
18115 SDValue Reduce = DAG.
getNode(Opc,
DL, ReduceVT, Pred, InitVal, VecToReduce);
18128 bool SwapOperands =
false) {
18130 assert(
N->getNumOperands() == 4 &&
"Expected 3 operand intrinsic!");
18132 SDValue Op1 =
N->getOperand(SwapOperands ? 3 : 2);
18133 SDValue Op2 =
N->getOperand(SwapOperands ? 2 : 3);
18138 return DAG.
getNode(Opc,
SDLoc(
N),
N->getValueType(0), Op1, Op2);
18140 return DAG.
getNode(Opc,
SDLoc(
N),
N->getValueType(0), Pg, Op1, Op2);
18155 case Intrinsic::get_active_lane_mask: {
18157 EVT VT =
N->getValueType(0);
18180 N->getOperand(1),
N->getOperand(2));
18188 case Intrinsic::aarch64_neon_vcvtfxs2fp:
18189 case Intrinsic::aarch64_neon_vcvtfxu2fp:
18191 case Intrinsic::aarch64_neon_saddv:
18193 case Intrinsic::aarch64_neon_uaddv:
18195 case Intrinsic::aarch64_neon_sminv:
18197 case Intrinsic::aarch64_neon_uminv:
18199 case Intrinsic::aarch64_neon_smaxv:
18201 case Intrinsic::aarch64_neon_umaxv:
18203 case Intrinsic::aarch64_neon_fmax:
18205 N->getOperand(1),
N->getOperand(2));
18206 case Intrinsic::aarch64_neon_fmin:
18208 N->getOperand(1),
N->getOperand(2));
18209 case Intrinsic::aarch64_neon_fmaxnm:
18211 N->getOperand(1),
N->getOperand(2));
18212 case Intrinsic::aarch64_neon_fminnm:
18214 N->getOperand(1),
N->getOperand(2));
18215 case Intrinsic::aarch64_neon_smull:
18217 N->getOperand(1),
N->getOperand(2));
18218 case Intrinsic::aarch64_neon_umull:
18220 N->getOperand(1),
N->getOperand(2));
18221 case Intrinsic::aarch64_neon_pmull:
18223 N->getOperand(1),
N->getOperand(2));
18224 case Intrinsic::aarch64_neon_sqdmull:
18226 case Intrinsic::aarch64_neon_sqshl:
18227 case Intrinsic::aarch64_neon_uqshl:
18228 case Intrinsic::aarch64_neon_sqshlu:
18229 case Intrinsic::aarch64_neon_srshl:
18230 case Intrinsic::aarch64_neon_urshl:
18231 case Intrinsic::aarch64_neon_sshl:
18232 case Intrinsic::aarch64_neon_ushl:
18234 case Intrinsic::aarch64_neon_rshrn: {
18235 EVT VT =
N->getOperand(1).getValueType();
18238 DAG.
getConstant(1LLU << (
N->getConstantOperandVal(2) - 1),
DL, VT);
18245 case Intrinsic::aarch64_neon_sabd:
18247 N->getOperand(1),
N->getOperand(2));
18248 case Intrinsic::aarch64_neon_uabd:
18250 N->getOperand(1),
N->getOperand(2));
18251 case Intrinsic::aarch64_crc32b:
18252 case Intrinsic::aarch64_crc32cb:
18254 case Intrinsic::aarch64_crc32h:
18255 case Intrinsic::aarch64_crc32ch:
18257 case Intrinsic::aarch64_sve_saddv:
18259 if (
N->getOperand(2)->getValueType(0).getVectorElementType() ==
MVT::i64)
18263 case Intrinsic::aarch64_sve_uaddv:
18265 case Intrinsic::aarch64_sve_smaxv:
18267 case Intrinsic::aarch64_sve_umaxv:
18269 case Intrinsic::aarch64_sve_sminv:
18271 case Intrinsic::aarch64_sve_uminv:
18273 case Intrinsic::aarch64_sve_orv:
18275 case Intrinsic::aarch64_sve_eorv:
18277 case Intrinsic::aarch64_sve_andv:
18279 case Intrinsic::aarch64_sve_index:
18281 case Intrinsic::aarch64_sve_dup:
18283 case Intrinsic::aarch64_sve_dup_x:
18286 case Intrinsic::aarch64_sve_ext:
18288 case Intrinsic::aarch64_sve_mul:
18290 case Intrinsic::aarch64_sve_mul_u:
18292 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18293 case Intrinsic::aarch64_sve_smulh:
18295 case Intrinsic::aarch64_sve_smulh_u:
18297 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18298 case Intrinsic::aarch64_sve_umulh:
18300 case Intrinsic::aarch64_sve_umulh_u:
18302 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18303 case Intrinsic::aarch64_sve_smin:
18305 case Intrinsic::aarch64_sve_smin_u:
18307 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18308 case Intrinsic::aarch64_sve_umin:
18310 case Intrinsic::aarch64_sve_umin_u:
18312 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18313 case Intrinsic::aarch64_sve_smax:
18315 case Intrinsic::aarch64_sve_smax_u:
18317 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18318 case Intrinsic::aarch64_sve_umax:
18320 case Intrinsic::aarch64_sve_umax_u:
18322 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18323 case Intrinsic::aarch64_sve_lsl:
18325 case Intrinsic::aarch64_sve_lsl_u:
18327 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18328 case Intrinsic::aarch64_sve_lsr:
18330 case Intrinsic::aarch64_sve_lsr_u:
18332 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18333 case Intrinsic::aarch64_sve_asr:
18335 case Intrinsic::aarch64_sve_asr_u:
18337 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18338 case Intrinsic::aarch64_sve_fadd:
18340 case Intrinsic::aarch64_sve_fsub:
18342 case Intrinsic::aarch64_sve_fmul:
18344 case Intrinsic::aarch64_sve_add:
18346 case Intrinsic::aarch64_sve_add_u:
18349 case Intrinsic::aarch64_sve_sub:
18351 case Intrinsic::aarch64_sve_sub_u:
18354 case Intrinsic::aarch64_sve_subr:
18356 case Intrinsic::aarch64_sve_and:
18358 case Intrinsic::aarch64_sve_and_u:
18361 case Intrinsic::aarch64_sve_bic:
18363 case Intrinsic::aarch64_sve_bic_u:
18365 N->getOperand(2),
N->getOperand(3));
18366 case Intrinsic::aarch64_sve_eor:
18368 case Intrinsic::aarch64_sve_eor_u:
18371 case Intrinsic::aarch64_sve_orr:
18373 case Intrinsic::aarch64_sve_orr_u:
18376 case Intrinsic::aarch64_sve_sabd:
18378 case Intrinsic::aarch64_sve_sabd_u:
18380 N->getOperand(2),
N->getOperand(3));
18381 case Intrinsic::aarch64_sve_uabd:
18383 case Intrinsic::aarch64_sve_uabd_u:
18385 N->getOperand(2),
N->getOperand(3));
18386 case Intrinsic::aarch64_sve_sdiv_u:
18388 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18389 case Intrinsic::aarch64_sve_udiv_u:
18391 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18392 case Intrinsic::aarch64_sve_sqadd:
18394 case Intrinsic::aarch64_sve_sqsub:
18396 case Intrinsic::aarch64_sve_uqadd:
18398 case Intrinsic::aarch64_sve_uqsub:
18400 case Intrinsic::aarch64_sve_sqadd_x:
18402 N->getOperand(1),
N->getOperand(2));
18403 case Intrinsic::aarch64_sve_sqsub_x:
18405 N->getOperand(1),
N->getOperand(2));
18406 case Intrinsic::aarch64_sve_uqadd_x:
18408 N->getOperand(1),
N->getOperand(2));
18409 case Intrinsic::aarch64_sve_uqsub_x:
18411 N->getOperand(1),
N->getOperand(2));
18412 case Intrinsic::aarch64_sve_asrd:
18414 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18415 case Intrinsic::aarch64_sve_cmphs:
18416 if (!
N->getOperand(2).getValueType().isFloatingPoint())
18418 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
18421 case Intrinsic::aarch64_sve_cmphi:
18422 if (!
N->getOperand(2).getValueType().isFloatingPoint())
18424 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
18427 case Intrinsic::aarch64_sve_fcmpge:
18428 case Intrinsic::aarch64_sve_cmpge:
18430 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
18433 case Intrinsic::aarch64_sve_fcmpgt:
18434 case Intrinsic::aarch64_sve_cmpgt:
18436 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
18439 case Intrinsic::aarch64_sve_fcmpeq:
18440 case Intrinsic::aarch64_sve_cmpeq:
18442 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
18445 case Intrinsic::aarch64_sve_fcmpne:
18446 case Intrinsic::aarch64_sve_cmpne:
18448 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
18451 case Intrinsic::aarch64_sve_fcmpuo:
18453 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
18456 case Intrinsic::aarch64_sve_fadda:
18458 case Intrinsic::aarch64_sve_faddv:
18460 case Intrinsic::aarch64_sve_fmaxnmv:
18462 case Intrinsic::aarch64_sve_fmaxv:
18464 case Intrinsic::aarch64_sve_fminnmv:
18466 case Intrinsic::aarch64_sve_fminv:
18468 case Intrinsic::aarch64_sve_sel:
18470 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
18471 case Intrinsic::aarch64_sve_cmpeq_wide:
18473 case Intrinsic::aarch64_sve_cmpne_wide:
18475 case Intrinsic::aarch64_sve_cmpge_wide:
18477 case Intrinsic::aarch64_sve_cmpgt_wide:
18479 case Intrinsic::aarch64_sve_cmplt_wide:
18481 case Intrinsic::aarch64_sve_cmple_wide:
18483 case Intrinsic::aarch64_sve_cmphs_wide:
18485 case Intrinsic::aarch64_sve_cmphi_wide:
18487 case Intrinsic::aarch64_sve_cmplo_wide:
18489 case Intrinsic::aarch64_sve_cmpls_wide:
18491 case Intrinsic::aarch64_sve_ptest_any:
18492 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
18494 case Intrinsic::aarch64_sve_ptest_first:
18495 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
18497 case Intrinsic::aarch64_sve_ptest_last:
18498 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
18505 unsigned OC =
N->getOpcode();
18521 const SDValue SetCC =
N->getOperand(0);
18543 SDLoc(SetCC),
N->getValueType(0), Ext1, Ext2,
18558 (
N->getOperand(0).getOpcode() ==
ISD::ABDU ||
18559 N->getOperand(0).getOpcode() ==
ISD::ABDS)) {
18560 SDNode *ABDNode =
N->getOperand(0).getNode();
18569 if (
N->getValueType(0).isFixedLengthVector() &&
18578 SDValue SplatVal,
unsigned NumVecElts) {
18597 if (BasePtr->getOpcode() ==
ISD::ADD &&
18598 isa<ConstantSDNode>(BasePtr->getOperand(1))) {
18599 BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
18600 BasePtr = BasePtr->getOperand(0);
18603 unsigned Offset = EltOffset;
18604 while (--NumVecElts) {
18620 assert(ContentTy.
isSimple() &&
"No SVE containers for extended types");
18649 EVT VT =
N->getValueType(0);
18654 EVT ContainerVT = VT;
18659 SDValue Ops[] = {
N->getOperand(0),
18667 if (ContainerVT.
isInteger() && (VT != ContainerVT))
18675 EVT VT =
N->getValueType(0);
18676 EVT PtrTy =
N->getOperand(3).getValueType();
18682 auto *MINode = cast<MemIntrinsicSDNode>(
N);
18685 MINode->getOperand(3), DAG.
getUNDEF(PtrTy),
18687 MINode->getMemoryVT(), MINode->getMemOperand(),
18698 template <
unsigned Opcode>
18702 "Unsupported opcode.");
18704 EVT VT =
N->getValueType(0);
18710 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(2),
N->getOperand(3)};
18723 EVT DataVT =
Data.getValueType();
18731 if (
Data.getValueType().isFloatingPoint())
18736 SDValue Ops[] = {
N->getOperand(0),
18750 EVT DataVT =
Data.getValueType();
18751 EVT PtrTy =
N->getOperand(4).getValueType();
18756 auto *MINode = cast<MemIntrinsicSDNode>(
N);
18759 MINode->getMemoryVT(), MINode->getMemOperand(),
18789 if (!(((NumVecElts == 2 || NumVecElts == 3) &&
18791 ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
18813 if (Offset < -512 || Offset > 504)
18817 for (
int I = 0;
I < NumVecElts; ++
I) {
18829 ZeroReg = AArch64::WZR;
18832 ZeroReg = AArch64::XZR;
18856 if (NumVecElts != 4 && NumVecElts != 2)
18867 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
18869 for (
unsigned I = 0;
I < NumVecElts; ++
I) {
18885 if (IndexVal >= NumVecElts)
18887 IndexNotInserted.reset(IndexVal);
18892 if (IndexNotInserted.any())
18903 if (
S->isVolatile() ||
S->isIndexed())
18916 return ReplacedZeroSplat;
18922 if (!Subtarget->isMisaligned128StoreSlow())
18940 S->getAlign() <=
Align(2))
18947 return ReplacedSplat;
18958 SDValue BasePtr =
S->getBasePtr();
18960 DAG.
getStore(
S->getChain(),
DL, SubVector0, BasePtr,
S->getPointerInfo(),
18961 S->getAlign(),
S->getMemOperand()->getFlags());
18965 S->getPointerInfo(),
S->getAlign(),
18966 S->getMemOperand()->getFlags());
18973 if (
N->getOperand(2).isUndef())
18974 return N->getOperand(1);
18983 "Unexpected Opcode!");
18986 if (
N->getOperand(0).isUndef())
18987 return DAG.
getUNDEF(
N->getValueType(0));
18992 if (
N->getOperand(0).getOpcode() ==
ISD::MLOAD &&
19003 unsigned PgPattern =
Mask->getConstantOperandVal(0);
19004 EVT VT =
N->getValueType(0);
19032 EVT ResVT =
N->getValueType(0);
19082 if (!IsLittleEndian)
19089 const unsigned Opcode = Operand.
getOpcode();
19098 SDValue SourceOp0 = getSourceOp(Op0);
19099 SDValue SourceOp1 = getSourceOp(Op1);
19101 if (!SourceOp0 || !SourceOp1)
19129 EVT BitcastResultTy;
19150 unsigned Opc =
N->getOpcode();
19156 "Invalid opcode.");
19174 EVT ResVT =
N->getValueType(0);
19176 const auto OffsetOpc =
Offset.getOpcode();
19177 const bool OffsetIsZExt =
19179 const bool OffsetIsSExt =
19183 if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
19185 VTSDNode *ExtFrom = cast<VTSDNode>(
Offset.getOperand(2).getNode());
19191 if (ExtPg == Pg && ExtFromEVT ==
MVT::i32) {
19199 {Chain, Pg,
Base, UnextendedOffset, Ty});
19215 unsigned OpScalarSize =
Op.getScalarValueSizeInBits();
19217 unsigned ShiftImm =
N->getConstantOperandVal(1);
19218 assert(OpScalarSize > ShiftImm &&
"Invalid shift imm");
19221 APInt DemandedMask = ~ShiftedOutBits;
19234 N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
19236 SDValue CC =
N->getOperand(0)->getOperand(0);
19237 auto VT =
CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.
getContext());
19255 EVT VT =
N->getValueType(0);
19260 unsigned LoadIdx = IsLaneOp ? 1 : 0;
19261 SDNode *
LD =
N->getOperand(LoadIdx).getNode();
19269 Lane =
N->getOperand(2);
19270 auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
19285 if (UI.getUse().getResNo() == 1)
19295 Addr.getNode()->use_end(); UI != UE; ++UI) {
19298 || UI.getUse().getResNo() !=
Addr.getResNo())
19304 uint32_t IncVal = CInc->getZExtValue();
19306 if (IncVal != NumBytes)
19316 Worklist.push_back(
User);
19317 Worklist.push_back(
LD);
19318 Worklist.push_back(
Vector.getNode());
19324 Ops.push_back(
LD->getOperand(0));
19327 Ops.push_back(Lane);
19329 Ops.push_back(
Addr);
19330 Ops.push_back(Inc);
19372 "Expected STORE dag node in input!");
19374 if (
auto Store = dyn_cast<StoreSDNode>(
N)) {
19375 if (!
Store->isTruncatingStore() ||
Store->isIndexed())
19378 auto ExtOpCode =
Ext.getOpcode();
19386 Store->getBasePtr(),
Store->getMemOperand());
19403 EVT MemVT =
LD->getMemoryVT();
19426 for (
unsigned I = 0;
I < Num256Loads;
I++) {
19427 unsigned PtrOffset =
I * 32;
19432 NewVT,
DL, Chain, NewPtr,
LD->getPointerInfo().getWithOffset(PtrOffset),
19433 NewAlign,
LD->getMemOperand()->getFlags(),
LD->getAAInfo());
19435 LoadOpsChain.push_back(
SDValue(cast<SDNode>(NewLoad), 1));
19443 unsigned PtrOffset = (MemVT.
getSizeInBits() - BitsRemaining) / 8;
19451 DAG.
getLoad(RemainingVT,
DL, Chain, NewPtr,
19452 LD->getPointerInfo().getWithOffset(PtrOffset), NewAlign,
19453 LD->getMemOperand()->getFlags(),
LD->getAAInfo());
19456 SDValue ExtendedReminingLoad =
19458 {UndefVector, RemainingLoad, InsertIdx});
19459 LoadOps.push_back(ExtendedReminingLoad);
19460 LoadOpsChain.push_back(
SDValue(cast<SDNode>(RemainingLoad), 1));
19483 EVT ValueVT =
Value.getValueType();
19485 auto hasValidElementTypeForFPTruncStore = [](
EVT VT) {
19499 hasValidElementTypeForFPTruncStore(
Value.getOperand(0).getValueType()))
19501 ST->getMemoryVT(),
ST->getMemOperand());
19530 Value.getValueType().isInteger()) {
19535 EVT InVT =
Value.getOperand(0).getValueType();
19539 unsigned PgPattern =
Mask->getConstantOperandVal(0);
19564 EVT IndexVT =
Index.getValueType();
19593 SDValue OffsetOp = Add.getOperand(1);
19600 Add.getOperand(0), ShiftOp);
19616 bool Changed =
false;
19622 EVT IndexVT =
Index.getValueType();
19627 EVT DataVT =
N->getOperand(1).getValueType();
19640 int64_t Stride = 0;
19642 Stride = cast<ConstantSDNode>(
Index.getOperand(0))->getSExtValue();
19652 Stride = Step <<
Shift->getZExtValue();
19665 unsigned MaxVScale =
19667 int64_t LastElementOffset =
19684 assert(MGS &&
"Can only combine gather load or scatter store nodes");
19702 if (
auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) {
19709 auto *MSC = cast<MaskedScatterSDNode>(MGS);
19725 unsigned AddrOpIdx =
N->getNumOperands() - 1;
19730 UE =
Addr.getNode()->use_end(); UI != UE; ++UI) {
19733 UI.getUse().getResNo() !=
Addr.getResNo())
19741 Worklist.push_back(
N);
19742 Worklist.push_back(
User);
19748 bool IsStore =
false;
19749 bool IsLaneOp =
false;
19750 bool IsDupOp =
false;
19751 unsigned NewOpc = 0;
19752 unsigned NumVecs = 0;
19753 unsigned IntNo = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
19757 NumVecs = 2;
break;
19759 NumVecs = 3;
break;
19761 NumVecs = 4;
break;
19763 NumVecs = 2; IsStore =
true;
break;
19765 NumVecs = 3; IsStore =
true;
break;
19767 NumVecs = 4; IsStore =
true;
break;
19769 NumVecs = 2;
break;
19771 NumVecs = 3;
break;
19773 NumVecs = 4;
break;
19775 NumVecs = 2; IsStore =
true;
break;
19777 NumVecs = 3; IsStore =
true;
break;
19779 NumVecs = 4; IsStore =
true;
break;
19781 NumVecs = 2; IsDupOp =
true;
break;
19783 NumVecs = 3; IsDupOp =
true;
break;
19785 NumVecs = 4; IsDupOp =
true;
break;
19787 NumVecs = 2; IsLaneOp =
true;
break;
19789 NumVecs = 3; IsLaneOp =
true;
break;
19791 NumVecs = 4; IsLaneOp =
true;
break;
19793 NumVecs = 2; IsStore =
true; IsLaneOp =
true;
break;
19795 NumVecs = 3; IsStore =
true; IsLaneOp =
true;
break;
19797 NumVecs = 4; IsStore =
true; IsLaneOp =
true;
break;
19802 VecTy =
N->getOperand(2).getValueType();
19804 VecTy =
N->getValueType(0);
19809 uint32_t IncVal = CInc->getZExtValue();
19811 if (IsLaneOp || IsDupOp)
19813 if (IncVal != NumBytes)
19818 Ops.push_back(
N->getOperand(0));
19820 if (IsLaneOp || IsStore)
19821 for (
unsigned i = 2;
i < AddrOpIdx; ++
i)
19822 Ops.push_back(
N->getOperand(
i));
19823 Ops.push_back(
Addr);
19824 Ops.push_back(Inc);
19828 unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
19830 for (
n = 0;
n < NumResultVecs; ++
n)
19842 std::vector<SDValue> NewResults;
19843 for (
unsigned i = 0;
i < NumResultVecs; ++
i) {
19846 NewResults.push_back(
SDValue(UpdN.
getNode(), NumResultVecs + 1));
19893 1LL << (width - 1);
19963 int CompConstant) {
19967 int MaxUInt = (1 << width);
19975 AddConstant -= (1 << (width-1));
19980 if ((AddConstant == 0) ||
19981 (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
19982 (AddConstant >= 0 && CompConstant < 0) ||
19983 (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
19988 if ((AddConstant == 0) ||
19989 (AddConstant >= 0 && CompConstant <= 0) ||
19990 (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
19995 if ((AddConstant >= 0 && CompConstant < 0) ||
19996 (AddConstant <= 0 && CompConstant >= -1 &&
19997 CompConstant < AddConstant + MaxUInt))
20002 if ((AddConstant == 0) ||
20003 (AddConstant > 0 && CompConstant <= 0) ||
20004 (AddConstant < 0 && CompConstant <= AddConstant))
20009 if ((AddConstant >= 0 && CompConstant <= 0) ||
20010 (AddConstant <= 0 && CompConstant >= 0 &&
20011 CompConstant <= AddConstant + MaxUInt))
20016 if ((AddConstant > 0 && CompConstant < 0) ||
20017 (AddConstant < 0 && CompConstant >= 0 &&
20018 CompConstant < AddConstant + MaxUInt) ||
20019 (AddConstant >= 0 && CompConstant >= 0 &&
20020 CompConstant >= AddConstant) ||
20021 (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
20040 unsigned CCIndex,
unsigned CmpIndex,
20069 N->getOperand(CCIndex)->getValueType(0));
20077 assert((CCIndex == 2 && CmpIndex == 3) &&
20078 "Expected CCIndex to be 2 and CmpIndex to be 3.");
20079 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1), AArch64_CC,
20081 return DAG.
getNode(
N->getOpcode(),
N,
N->getVTList(), Ops);
20088 unsigned CmpIndex) {
20089 unsigned CC = cast<ConstantSDNode>(
N->getOperand(CCIndex))->getSExtValue();
20090 SDNode *SubsNode =
N->getOperand(CmpIndex).getNode();
20091 unsigned CondOpcode = SubsNode->
getOpcode();
20100 unsigned MaskBits = 0;
20110 uint32_t CNV = CN->getZExtValue();
20113 else if (CNV == 65535)
20134 if (!isa<ConstantSDNode>(AddInputValue2.
getNode()) ||
20135 !isa<ConstantSDNode>(SubsInputValue.
getNode()))
20146 cast<ConstantSDNode>(AddInputValue2.
getNode())->getSExtValue(),
20147 cast<ConstantSDNode>(SubsInputValue.
getNode())->getSExtValue()))
20180 assert(isa<ConstantSDNode>(CCVal) &&
"Expected a ConstantSDNode here!");
20181 unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
20185 unsigned CmpOpc = Cmp.getOpcode();
20191 if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
20198 "Expected the value type to be the same for both operands!");
20226 unsigned CC =
N->getConstantOperandVal(2);
20231 Zero =
N->getOperand(0);
20232 CTTZ =
N->getOperand(1);
20234 Zero =
N->getOperand(1);
20235 CTTZ =
N->getOperand(0);
20245 "Illegal type in CTTZ folding");
20251 ?
CTTZ.getOperand(0).getOperand(0)
20252 :
CTTZ.getOperand(0);
20254 if (
X !=
SUBS.getOperand(0))
20258 ?
CTTZ.getOperand(0).getValueSizeInBits()
20259 :
CTTZ.getValueSizeInBits();
20293 if (!isa<ConstantSDNode>(
X) || !isa<ConstantSDNode>(
Y) ||
X ==
Y) {
20311 else if (CmpRHS !=
X)
20320 EVT VT =
Op->getValueType(0);
20331 if (
N->getOperand(0) ==
N->getOperand(1))
20332 return N->getOperand(0);
20349 EVT Op0MVT =
Op->getOperand(0).getValueType();
20355 SDNode *FirstUse = *
Op->use_begin();
20362 return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
20377 Op->getOperand(0));
20379 Op->getOperand(0));
20381 Op0ExtV =
SDValue(Op0SExt, 0);
20384 Op0ExtV =
SDValue(Op0ZExt, 0);
20390 Op0ExtV, Op1ExtV,
Op->getOperand(2));
20401 EVT VT =
N->getValueType(0);
20412 auto *OpCC = cast<ConstantSDNode>(
LHS.getOperand(2));
20420 LHS.getOperand(3));
20426 LHS->getOpcode() ==
ISD::SRL && isa<ConstantSDNode>(
LHS->getOperand(1)) &&
20429 EVT TstVT =
LHS->getValueType(0);
20432 uint64_t TstImm = -1ULL <<
LHS->getConstantOperandVal(1);
20444 EVT ToVT =
LHS->getValueType(0);
20445 EVT FromVT =
LHS->getOperand(0).getValueType();
20465 unsigned GenericOpcode) {
20469 EVT VT =
N->getValueType(0);
20472 if (!
N->hasAnyUseOfValue(1)) {
20527 "Unexpected opcode!");
20540 LHS->getOperand(0)->getValueType(0) ==
N->getValueType(0)) {
20545 LHS->getOperand(0)->getOperand(0) == Pred)
20546 return LHS->getOperand(0);
20552 return LHS->getOperand(0);
20561 LHS->getOperand(0), Pred);
20574 if (!
Op->hasOneUse())
20584 Bit < Op->getValueType(0).getSizeInBits()) {
20590 Bit < Op->getOperand(0).getValueSizeInBits()) {
20594 if (
Op->getNumOperands() != 2)
20597 auto *
C = dyn_cast<ConstantSDNode>(
Op->getOperand(1));
20601 switch (
Op->getOpcode()) {
20607 if ((
C->getZExtValue() >>
Bit) & 1)
20613 if (
C->getZExtValue() <=
Bit &&
20614 (
Bit -
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
20615 Bit =
Bit -
C->getZExtValue();
20622 Bit =
Bit +
C->getZExtValue();
20623 if (
Bit >=
Op->getValueType(0).getSizeInBits())
20624 Bit =
Op->getValueType(0).getSizeInBits() - 1;
20629 if ((
Bit +
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
20630 Bit =
Bit +
C->getZExtValue();
20637 if ((
C->getZExtValue() >>
Bit) & 1)
20647 unsigned Bit = cast<ConstantSDNode>(
N->getOperand(2))->getZExtValue();
20648 bool Invert =
false;
20649 SDValue TestSrc =
N->getOperand(1);
20652 if (TestSrc == NewTestSrc)
20655 unsigned NewOpc =
N->getOpcode();
20676 auto SelectA =
N->getOperand(1);
20677 auto SelectB =
N->getOperand(2);
20678 auto NTy =
N->getValueType(0);
20680 if (!NTy.isScalableVector())
20686 switch (SelectB.getOpcode()) {
20694 if (SelectA != SelectB.getOperand(0))
20700 auto InverseSetCC =
20705 {InverseSetCC, SelectB, SelectA});
20721 return N->getOperand(1);
20724 return N->getOperand(2);
20735 SDNode *SplatLHS =
N->getOperand(1).getNode();
20736 SDNode *SplatRHS =
N->getOperand(2).getNode();
20738 if (CmpLHS.
getValueType() ==
N->getOperand(1).getValueType() &&
20763 EVT ResVT =
N->getValueType(0);
20770 SDValue IfTrue =
N->getOperand(1);
20771 SDValue IfFalse =
N->getOperand(2);
20774 cast<CondCodeSDNode>(N0.
getOperand(2))->get());
20787 EVT ResVT =
N->getValueType(0);
20799 "Scalar-SETCC feeding SELECT has unexpected result type!");
20812 if (!ResVT.
isVector() || NumMaskElts == 0)
20848 EVT VT =
N->getValueType(0);
20854 N->getOpcode(), DCI.
DAG.
getVTList(LVT), {N->getOperand(0)})) {
20866 if (
N->getValueType(0) ==
N->getOperand(0).getValueType())
20867 return N->getOperand(0);
20878 auto *GN = cast<GlobalAddressSDNode>(
N);
20887 auto *
C = dyn_cast<ConstantSDNode>(
N->getOperand(0));
20889 C = dyn_cast<ConstantSDNode>(
N->getOperand(1));
20892 MinOffset =
std::min(MinOffset,
C->getZExtValue());
20911 if (
Offset >= (1 << 20))
20916 if (!
T->isSized() ||
20930 !
BR.getValueType().isScalarInteger())
20942 "This method is only for scalable vectors of offsets");
20958 unsigned ScalarSizeInBytes) {
20960 if (OffsetInBytes % ScalarSizeInBytes)
20964 if (OffsetInBytes / ScalarSizeInBytes > 31)
20978 unsigned ScalarSizeInBytes) {
20986 bool OnlyPackedOffsets =
true) {
20987 const SDValue Src =
N->getOperand(2);
20988 const EVT SrcVT = Src->getValueType(0);
20990 "Scatter stores are only possible for SVE vectors");
21047 if (!TLI.isTypeLegal(
Base.getValueType()))
21053 if (!OnlyPackedOffsets &&
21057 if (!TLI.isTypeLegal(
Offset.getValueType()))
21073 if (Src.getValueType().isFloatingPoint())
21078 SDValue Ops[] = {
N->getOperand(0),
21085 return DAG.
getNode(Opcode,
DL, VTs, Ops);
21090 bool OnlyPackedOffsets =
true) {
21091 const EVT RetVT =
N->getValueType(0);
21093 "Gather loads are only possible for SVE vectors");
21123 Offset.getValueType().isVector())
21150 if (!TLI.isTypeLegal(
Base.getValueType()))
21156 if (!OnlyPackedOffsets &&
21171 SDValue Ops[] = {
N->getOperand(0),
21178 if (RetVT.
isInteger() && (RetVT != HwRetVt))
21194 unsigned Opc = Src->getOpcode();
21210 SDValue ExtOp = Src->getOperand(0);
21211 auto VT = cast<VTSDNode>(
N->getOperand(1))->getVT();
21216 "Sign extending from an invalid type");
21235 unsigned MemVTOpNum = 4;
21298 EVT SignExtSrcVT = cast<VTSDNode>(
N->getOperand(1))->getVT();
21299 EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
21301 if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
21304 EVT DstVT =
N->getValueType(0);
21308 for (
unsigned I = 0;
I < Src->getNumOperands(); ++
I)
21309 Ops.push_back(Src->getOperand(
I));
21323 const unsigned OffsetPos = 4;
21335 Ops[OffsetPos] =
Offset;
21346 unsigned ScalarSizeInBytes) {
21347 const unsigned ImmPos = 4, OffsetPos = 3;
21354 std::swap(Ops[ImmPos], Ops[OffsetPos]);
21358 Ops[1] = DAG.
getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index,
DL,
21367 switch (
Op.getOpcode()) {
21391 SDValue InsertVec =
N->getOperand(0);
21392 SDValue InsertElt =
N->getOperand(1);
21393 SDValue InsertIdx =
N->getOperand(2);
21434 EVT Ty =
N->getValueType(0);
21459 EVT VT =
N->getValueType(0);
21465 auto hasValidElementTypeForFPExtLoad = [](
EVT VT) {
21495 EVT VT =
N->getValueType(0);
21498 if (!VT.
isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
21514 EVT VT =
N->getValueType(0);
21520 if (!
Insert.getOperand(0).isUndef())
21524 uint64_t IdxDupLane =
N->getConstantOperandVal(1);
21525 if (IdxInsert != 0 || IdxDupLane != 0)
21544 NewInsert,
N->getOperand(1));
21551 switch (
N->getOpcode()) {
21689 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
21690 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
21692 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
21694 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
21696 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
21698 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
21699 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
21700 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
21701 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
21702 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
21703 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
21704 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
21705 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
21707 case Intrinsic::aarch64_neon_ld2:
21708 case Intrinsic::aarch64_neon_ld3:
21709 case Intrinsic::aarch64_neon_ld4:
21710 case Intrinsic::aarch64_neon_ld1x2:
21711 case Intrinsic::aarch64_neon_ld1x3:
21712 case Intrinsic::aarch64_neon_ld1x4:
21713 case Intrinsic::aarch64_neon_ld2lane:
21714 case Intrinsic::aarch64_neon_ld3lane:
21715 case Intrinsic::aarch64_neon_ld4lane:
21716 case Intrinsic::aarch64_neon_ld2r:
21717 case Intrinsic::aarch64_neon_ld3r:
21718 case Intrinsic::aarch64_neon_ld4r:
21719 case Intrinsic::aarch64_neon_st2:
21720 case Intrinsic::aarch64_neon_st3:
21721 case Intrinsic::aarch64_neon_st4:
21722 case Intrinsic::aarch64_neon_st1x2:
21723 case Intrinsic::aarch64_neon_st1x3:
21724 case Intrinsic::aarch64_neon_st1x4:
21725 case Intrinsic::aarch64_neon_st2lane:
21726 case Intrinsic::aarch64_neon_st3lane:
21727 case Intrinsic::aarch64_neon_st4lane:
21729 case Intrinsic::aarch64_sve_ldnt1:
21731 case Intrinsic::aarch64_sve_ld1rq:
21732 return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(
N, DAG);
21733 case Intrinsic::aarch64_sve_ld1ro:
21734 return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(
N, DAG);
21735 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
21737 case Intrinsic::aarch64_sve_ldnt1_gather:
21739 case Intrinsic::aarch64_sve_ldnt1_gather_index:
21742 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
21744 case Intrinsic::aarch64_sve_ld1:
21746 case Intrinsic::aarch64_sve_ldnf1:
21748 case Intrinsic::aarch64_sve_ldff1:
21750 case Intrinsic::aarch64_sve_st1:
21752 case Intrinsic::aarch64_sve_stnt1:
21754 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
21756 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
21758 case Intrinsic::aarch64_sve_stnt1_scatter:
21760 case Intrinsic::aarch64_sve_stnt1_scatter_index:
21762 case Intrinsic::aarch64_sve_ld1_gather:
21764 case Intrinsic::aarch64_sve_ld1_gather_index:
21767 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
21770 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
21773 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
21777 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
21781 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
21783 case Intrinsic::aarch64_sve_ldff1_gather:
21785 case Intrinsic::aarch64_sve_ldff1_gather_index:
21788 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
21792 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
21796 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
21800 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
21804 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
21807 case Intrinsic::aarch64_sve_st1_scatter:
21809 case Intrinsic::aarch64_sve_st1_scatter_index:
21811 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
21814 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
21817 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
21821 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
21825 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
21827 case Intrinsic::aarch64_rndr:
21828 case Intrinsic::aarch64_rndrrs: {
21829 unsigned IntrinsicID =
21830 cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
21832 (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
21833 : AArch64SysReg::RNDRRS);
21861 bool AArch64TargetLowering::isUsedByReturnOnly(
SDNode *
N,
21863 if (
N->getNumValues() != 1)
21865 if (!
N->hasNUsesOfValue(1, 0))
21869 SDNode *Copy = *
N->use_begin();
21873 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
21876 TCChain = Copy->getOperand(0);
21880 bool HasRet =
false;
21881 for (
SDNode *Node : Copy->uses()) {
21898 bool AArch64TargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
21902 bool AArch64TargetLowering::getIndexedAddressParts(
SDNode *
N,
SDNode *
Op,
21910 SDNode *ValOnlyUser =
nullptr;
21913 if (UI.getUse().getResNo() == 1)
21915 if (ValOnlyUser ==
nullptr)
21918 ValOnlyUser =
nullptr;
21923 auto IsUndefOrZero = [](
SDValue V) {
21932 IsUndefOrZero(ValOnlyUser->
getOperand(2)))))
21935 Base =
Op->getOperand(0);
21939 int64_t RHSC =
RHS->getSExtValue();
21942 if (!isInt<9>(RHSC))
21952 bool AArch64TargetLowering::getPreIndexedAddressParts(
SDNode *
N,
SDValue &Base,
21959 VT =
LD->getMemoryVT();
21960 Ptr =
LD->getBasePtr();
21962 VT =
ST->getMemoryVT();
21963 Ptr =
ST->getBasePtr();
21967 if (!getIndexedAddressParts(
N,
Ptr.getNode(), Base,
Offset, DAG))
21973 bool AArch64TargetLowering::getPostIndexedAddressParts(
21979 VT =
LD->getMemoryVT();
21980 Ptr =
LD->getBasePtr();
21982 VT =
ST->getMemoryVT();
21983 Ptr =
ST->getBasePtr();
21987 if (!getIndexedAddressParts(
N,
Op, Base,
Offset, DAG))
21997 void AArch64TargetLowering::ReplaceBITCASTResults(
22001 EVT VT =
N->getValueType(0);
22002 EVT SrcVT =
Op.getValueType();
22006 "Expected fp->int bitcast!");
22036 EVT VT =
N->getValueType(0);
22039 !
N->getFlags().hasAllowReassociation()) ||
22044 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(
N->getOperand(1));
22046 Shuf = dyn_cast<ShuffleVectorSDNode>(
N->getOperand(0));
22047 X =
N->getOperand(1);
22052 if (Shuf->getOperand(0) !=
X || !Shuf->getOperand(1)->isUndef())
22057 for (
int I = 0,
E =
Mask.size();
I <
E;
I++)
22058 if (
Mask[
I] != (
I % 2 == 0 ?
I + 1 :
I - 1))
22063 assert(LoHi.first.getValueType() == LoHi.second.getValueType());
22065 LoHi.first, LoHi.second);
22070 NMask.push_back(
I);
22071 NMask.push_back(
I);
22076 DAG.
getUNDEF(LoHi.first.getValueType())),
22083 unsigned AcrossOp) {
22100 return std::make_pair(Lo, Hi);
22103 void AArch64TargetLowering::ReplaceExtractSubVectorResults(
22106 EVT InVT =
In.getValueType();
22113 EVT VT =
N->getValueType(0);
22122 auto *CIndex = dyn_cast<ConstantSDNode>(
N->getOperand(1));
22126 unsigned Index = CIndex->getZExtValue();
22150 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
22160 "AtomicCmpSwap on types less than 128 should be legal");
22163 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
22174 switch (
MemOp->getMergedOrdering()) {
22176 Opcode = AArch64::CASPX;
22179 Opcode = AArch64::CASPAX;
22182 Opcode = AArch64::CASPLX;
22186 Opcode = AArch64::CASPALX;
22196 unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
22210 switch (
MemOp->getMergedOrdering()) {
22212 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
22215 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
22218 Opcode = AArch64::CMP_SWAP_128_RELEASE;
22222 Opcode = AArch64::CMP_SWAP_128;
22230 SDValue Ops[] = {
N->getOperand(1), Desired.first, Desired.second,
22231 New.first, New.second,
N->getOperand(0)};
22249 "ATOMIC_LOAD_AND should be lowered to LDCLRP directly");
22255 switch (Ordering) {
22257 return AArch64::LDCLRP;
22260 return AArch64::LDCLRPA;
22263 return AArch64::LDCLRPL;
22267 return AArch64::LDCLRPAL;
22275 switch (Ordering) {
22277 return AArch64::LDSETP;
22280 return AArch64::LDSETPA;
22283 return AArch64::LDSETPL;
22287 return AArch64::LDSETPAL;
22295 switch (Ordering) {
22297 return AArch64::SWPP;
22300 return AArch64::SWPPA;
22303 return AArch64::SWPPL;
22307 return AArch64::SWPPAL;
22330 "AtomicLoadXXX on types less than 128 should be legal");
22332 if (!Subtarget->hasLSE128())
22336 const SDValue &Chain =
N->getOperand(0);
22338 const SDValue &Val128 =
N->getOperand(2);
22339 std::pair<SDValue, SDValue> Val2x64 =
splitInt128(Val128, DAG);
22341 const unsigned ISDOpcode =
N->getOpcode();
22342 const unsigned MachineOpcode =
22355 SDValue Ops[] = {Val2x64.first, Val2x64.second,
Ptr, Chain};
22373 void AArch64TargetLowering::ReplaceNodeResults(
22375 switch (
N->getOpcode()) {
22379 ReplaceBITCASTResults(
N,
Results, DAG);
22420 assert(
N->getValueType(0) ==
MVT::i128 &&
"unexpected illegal conversion");
22428 "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
22434 "Expected 128-bit atomicrmw.");
22454 DAG.
getVTList({MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
22455 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
22457 {LoadNode->getChain(), LoadNode->getBasePtr()},
22474 auto *AN = dyn_cast<AtomicSDNode>(LoadNode);
22475 bool isLoadAcquire =
22480 assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
22484 {LoadNode->getChain(), LoadNode->getBasePtr()},
22494 ReplaceExtractSubVectorResults(
N,
Results, DAG);
22503 EVT VT =
N->getValueType(0);
22505 "custom lowering for unexpected type");
22512 case Intrinsic::aarch64_sve_clasta_n: {
22516 N->getOperand(1), Op2,
N->getOperand(3));
22520 case Intrinsic::aarch64_sve_clastb_n: {
22524 N->getOperand(1), Op2,
N->getOperand(3));
22528 case Intrinsic::aarch64_sve_lasta: {
22531 N->getOperand(1),
N->getOperand(2));
22535 case Intrinsic::aarch64_sve_lastb: {
22538 N->getOperand(1),
N->getOperand(2));
22547 "READ_REGISTER custom lowering is only for 128-bit sysregs");
22549 SDValue SysRegName =
N->getOperand(1);
22553 Chain, SysRegName);
22572 unsigned AArch64TargetLowering::combineRepeatedFPDivisors()
const {
22592 if (!Subtarget->hasLSE2())
22595 if (
auto LI = dyn_cast<LoadInst>(
I))
22596 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
22597 LI->getAlign() >=
Align(16);
22599 if (
auto SI = dyn_cast<StoreInst>(
I))
22600 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
22607 if (!Subtarget->hasLSE128())
22612 if (
const auto *
SI = dyn_cast<StoreInst>(
I))
22613 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
22614 SI->getAlign() >=
Align(16) &&
22618 if (
const auto *RMW = dyn_cast<AtomicRMWInst>(
I))
22619 return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
22620 RMW->getAlign() >=
Align(16) &&
22629 if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
22632 if (
auto LI = dyn_cast<LoadInst>(
I))
22633 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
22634 LI->getAlign() >=
Align(16) &&
22637 if (
auto SI = dyn_cast<StoreInst>(
I))
22638 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
22639 SI->getAlign() >=
Align(16) &&
22665 switch (
I->getOpcode()) {
22668 case Instruction::AtomicCmpXchg:
22669 return cast<AtomicCmpXchgInst>(
I)->getSuccessOrdering() ==
22671 case Instruction::AtomicRMW:
22672 return cast<AtomicRMWInst>(
I)->getOrdering() ==
22675 return cast<StoreInst>(
I)->getOrdering() ==
22685 unsigned Size =
SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
22735 bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
22745 if (Subtarget->hasLSE())
22747 if (Subtarget->outlineAtomics()) {
22770 Subtarget->hasLSE())
22780 if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
22810 IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
22816 Value *Lo =
Builder.CreateExtractValue(LoHi, 0,
"lo");
22817 Value *Hi =
Builder.CreateExtractValue(LoHi, 1,
"hi");
22818 Lo =
Builder.CreateZExt(Lo, ValueTy,
"lo64");
22819 Hi =
Builder.CreateZExt(Hi, ValueTy,
"hi64");
22824 Type *Tys[] = {
Addr->getType() };
22826 IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
22836 return Builder.CreateBitCast(Trunc, ValueTy);
22856 IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
22867 IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
22868 Type *Tys[] = {
Addr->getType() };
22873 Val =
Builder.CreateBitCast(Val, IntValTy);
22876 Stxr, {
Builder.CreateZExtOrBitCast(
22880 Attribute::ElementType, Val->
getType()));
22898 bool AArch64TargetLowering::shouldNormalizeToSelectSequence(
LLVMContext &,
22932 M.getOrInsertGlobal(
"__security_cookie",
22941 F->addParamAttr(0, Attribute::AttrKind::InReg);
22951 return M.getGlobalVariable(
"__security_cookie");
22988 return Mask->getValue().isPowerOf2();
22994 unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
22998 X,
XC,
CC,
Y, OldShiftOpcode, NewShiftOpcode, DAG))
23001 return X.getValueType().isScalarInteger() || NewShiftOpcode ==
ISD::SHL;
23024 const MCPhysReg *IStart =
TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
23033 if (AArch64::GPR64RegClass.
contains(*
I))
23034 RC = &AArch64::GPR64RegClass;
23035 else if (AArch64::FPR64RegClass.
contains(*
I))
23036 RC = &AArch64::FPR64RegClass;
23046 assert(Entry->getParent()->getFunction().hasFnAttribute(
23047 Attribute::NoUnwind) &&
23048 "Function should be nounwind in insertCopiesSplitCSR!");
23049 Entry->addLiveIn(*
I);
23054 for (
auto *Exit : Exits)
23056 TII->get(TargetOpcode::COPY), *
I)
23069 bool OptSize = Attr.
hasFnAttr(Attribute::MinSize);
23082 if (FPVT ==
MVT::v8f16 && !Subtarget->hasFullFP16())
23099 void AArch64TargetLowering::finalizeLowering(
MachineFunction &MF)
const {
23127 bool AArch64TargetLowering::shouldLocalize(
23129 auto &MF = *
MI.getMF();
23131 auto maxUses = [](
unsigned RematCost) {
23133 if (RematCost == 1)
23135 if (RematCost == 2)
23144 switch (
MI.getOpcode()) {
23145 case TargetOpcode::G_GLOBAL_VALUE: {
23154 case TargetOpcode::G_CONSTANT: {
23155 auto *CI =
MI.getOperand(1).getCImm();
23159 assert(
Cost.isValid() &&
"Expected a valid imm cost");
23161 unsigned RematCost = *
Cost.getValue();
23163 unsigned MaxUses = maxUses(RematCost);
23172 case AArch64::G_ADD_LOW:
23181 if (isa<ScalableVectorType>(Inst.
getType()))
23188 if (
const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
23189 if (isa<ScalableVectorType>(AI->getAllocatedType()))
23194 if (
auto *
Base = dyn_cast<CallBase>(&Inst)) {
23197 if (CallerAttrs.requiresSMChange(CalleeAttrs,
23199 CallerAttrs.requiresLazySave(CalleeAttrs))
23209 "Expected legal fixed length vector!");
23235 "Expected legal fixed length vector!");
23237 std::optional<unsigned> PgPattern =
23239 assert(PgPattern &&
"Unexpected element count for SVE predicate");
23246 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
23247 if (MaxSVESize && MinSVESize == MaxSVESize &&
23272 return getPTrue(DAG,
DL, MaskVT, *PgPattern);
23278 "Expected legal scalable vector!");
23293 "Expected to convert into a scalable vector!");
23295 "Expected a fixed length vector operand!");
23304 "Expected to convert into a fixed length vector!");
23306 "Expected a scalable vector operand!");
23313 SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
23315 auto Load = cast<LoadSDNode>(
Op);
23318 EVT VT =
Op.getValueType();
23320 EVT LoadVT = ContainerVT;
23321 EVT MemVT =
Load->getMemoryVT();
23331 LoadVT,
DL,
Load->getChain(),
Load->getBasePtr(),
Load->getOffset(), Pg,
23333 Load->getAddressingMode(),
Load->getExtensionType());
23338 Load->getMemoryVT().getVectorElementType());
23340 Result = getSVESafeBitCast(ExtendVT, Result, DAG);
23342 Pg, Result, DAG.
getUNDEF(ContainerVT));
23355 EVT InVT =
Mask.getValueType();
23367 {Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
23371 SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
23373 auto Load = cast<MaskedLoadSDNode>(
Op);
23376 EVT VT =
Op.getValueType();
23382 bool IsPassThruZeroOrUndef =
false;
23384 if (
Load->getPassThru()->isUndef()) {
23385 PassThru = DAG.
getUNDEF(ContainerVT);
23386 IsPassThruZeroOrUndef =
true;
23393 IsPassThruZeroOrUndef =
true;
23397 ContainerVT,
DL,
Load->getChain(),
Load->getBasePtr(),
Load->getOffset(),
23398 Mask, PassThru,
Load->getMemoryVT(),
Load->getMemOperand(),
23399 Load->getAddressingMode(),
Load->getExtensionType());
23402 if (!IsPassThruZeroOrUndef) {
23414 SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
23416 auto Store = cast<StoreSDNode>(
Op);
23419 EVT VT =
Store->getValue().getValueType();
23428 Store->getMemoryVT().getVectorElementType());
23442 Store->getBasePtr(),
Store->getOffset(), Pg, MemVT,
23443 Store->getMemOperand(),
Store->getAddressingMode(),
23444 Store->isTruncatingStore());
23447 SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
23449 auto *
Store = cast<MaskedStoreSDNode>(
Op);
23452 EVT VT =
Store->getValue().getValueType();
23461 Store->getAddressingMode(),
Store->isTruncatingStore());
23464 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
23467 EVT VT =
Op.getValueType();
23492 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
23508 auto HalveAndExtendVector = [&DAG, &dl, &HalfVT, &PromVT,
23515 return std::pair<SDValue, SDValue>(
23516 {DAG.
getNode(ExtendOpcode, dl, PromVT, Lo),
23517 DAG.
getNode(ExtendOpcode, dl, PromVT, Hi)});
23521 auto [Op0LoExt, Op0HiExt] = HalveAndExtendVector(
Op.getOperand(0));
23522 auto [Op1LoExt, Op1HiExt] = HalveAndExtendVector(
Op.getOperand(1));
23530 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
23532 EVT VT =
Op.getValueType();
23566 SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
23568 EVT VT =
Op.getValueType();
23602 SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
23604 EVT VT =
Op.getValueType();
23605 EVT InVT =
Op.getOperand(0).getValueType();
23615 SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
23617 EVT VT =
Op.getValueType();
23621 EVT InVT =
Op.getOperand(0).getValueType();
23626 Op.getOperand(1),
Op.getOperand(2));
23636 unsigned NewOp)
const {
23637 EVT VT =
Op.getValueType();
23647 for (
const SDValue &V :
Op->op_values()) {
23648 if (isa<CondCodeSDNode>(V)) {
23653 if (
const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
23661 "Expected only legal fixed-width types");
23675 for (
const SDValue &V :
Op->op_values()) {
23678 "Only scalable vectors are supported!");
23693 EVT VT =
Op.getValueType();
23695 "Only expected to lower fixed length vector operation!");
23700 for (
const SDValue &V :
Op->op_values()) {
23701 assert(!isa<VTSDNode>(V) &&
"Unexpected VTSDNode node!");
23712 "Only fixed length vectors are supported!");
23716 auto ScalableRes = DAG.
getNode(
Op.getOpcode(),
SDLoc(
Op), ContainerVT, Ops);
23720 SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(
SDValue ScalarOp,
23728 EVT ContainerVT = SrcVT;
23739 DAG.
getUNDEF(ContainerVT), AccOp, Zero);
23748 SDValue AArch64TargetLowering::LowerPredReductionToSVE(
SDValue ReduceOp,
23752 EVT OpVT =
Op.getValueType();
23791 SDValue AArch64TargetLowering::LowerReductionToSVE(
unsigned Opcode,
23825 AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(
SDValue Op,
23827 EVT VT =
Op.getValueType();
23830 EVT InVT =
Op.getOperand(1).getValueType();
23837 EVT MaskVT =
Op.getOperand(0).getValueType();
23849 SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
23852 EVT InVT =
Op.getOperand(0).getValueType();
23856 "Only expected to lower fixed length vector operation!");
23858 "Expected integer result of the same bit length as the inputs!");
23866 {Pg, Op1, Op2,
Op.getOperand(2)});
23874 AArch64TargetLowering::LowerFixedLengthBitcastToSVE(
SDValue Op,
23877 auto SrcOp =
Op.getOperand(0);
23878 EVT VT =
Op.getValueType();
23880 EVT ContainerSrcVT =
23888 SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
23891 unsigned NumOperands =
Op->getNumOperands();
23894 "Unexpected number of operands in CONCAT_VECTORS");
23896 auto SrcOp1 =
Op.getOperand(0);
23897 auto SrcOp2 =
Op.getOperand(1);
23898 EVT VT =
Op.getValueType();
23899 EVT SrcVT = SrcOp1.getValueType();
23901 if (NumOperands > 2) {
23904 for (
unsigned I = 0;
I < NumOperands;
I += 2)
23906 Op->getOperand(
I),
Op->getOperand(
I + 1)));
23923 AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(
SDValue Op,
23925 EVT VT =
Op.getValueType();
23940 Val = getSVESafeBitCast(ExtendVT, Val, DAG);
23942 Pg, Val, DAG.
getUNDEF(ContainerVT));
23948 AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(
SDValue Op,
23950 EVT VT =
Op.getValueType();
23972 AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(
SDValue Op,
23974 EVT VT =
Op.getValueType();
23997 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
24007 Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
24016 AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(
SDValue Op,
24018 EVT VT =
Op.getValueType();
24040 Val = getSVESafeBitCast(CvtVT, Val, DAG);
24041 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
24058 SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
24060 EVT VT =
Op.getValueType();
24063 auto *SVN = cast<ShuffleVectorSDNode>(
Op.getNode());
24064 auto ShuffleMask = SVN->
getMask();
24074 auto MinLegalExtractEltScalarTy = [](
EVT ScalarTy) ->
EVT {
24089 bool ReverseEXT =
false;
24103 for (
unsigned LaneSize : {64U, 32U, 16U}) {
24104 if (
isREVMask(ShuffleMask, VT, LaneSize)) {
24111 else if (EltSz == 16)
24117 Op = LowerToPredicatedOp(
Op, DAG, RevOp);
24135 unsigned WhichResult;
24136 if (
isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
24140 if (
isTRNMask(ShuffleMask, VT, WhichResult)) {
24143 DAG, VT, DAG.
getNode(Opc,
DL, ContainerVT, Op1, Op2));
24153 DAG, VT, DAG.
getNode(Opc,
DL, ContainerVT, Op1, Op1));
24176 if (MinSVESize == MaxSVESize && MaxSVESize == VT.
getSizeInBits()) {
24182 if (
isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
24186 if (
isUZPMask(ShuffleMask, VT, WhichResult)) {
24189 DAG, VT, DAG.
getNode(Opc,
DL, ContainerVT, Op1, Op2));
24199 DAG, VT, DAG.
getNode(Opc,
DL, ContainerVT, Op1, Op1));
24209 EVT InVT =
Op.getValueType();
24213 "Only expect to cast between legal scalable vector types!");
24216 "For predicate bitcasts, use getSVEPredicateBitCast");
24232 VT == PackedVT || InVT == PackedInVT) &&
24233 "Unexpected bitcast!");
24236 if (InVT != PackedInVT)
24242 if (VT != PackedVT)
24257 bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
24259 const APInt &OriginalDemandedElts,
KnownBits &Known, TargetLoweringOpt &TLO,
24260 unsigned Depth)
const {
24262 unsigned Opc =
Op.getOpcode();
24279 if (ShiftRBits != ShiftLBits)
24282 unsigned ScalarSize =
Op.getScalarValueSizeInBits();
24283 assert(ScalarSize > ShiftLBits &&
"Invalid shift imm");
24286 APInt UnusedBits = ~OriginalDemandedBits;
24288 if ((ZeroBits & UnusedBits) != ZeroBits)
24298 if (!MaxSVEVectorSizeInBits)
24300 unsigned MaxElements = MaxSVEVectorSizeInBits / *ElementSize;
24316 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO,
Depth);
24319 bool AArch64TargetLowering::isTargetCanonicalConstantNode(
SDValue Op)
const {
24327 bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal(
24328 unsigned Opc,
LLT Ty1,
LLT Ty2)
const {
24333 return Subtarget->hasComplxNum();
24338 auto *VTy = dyn_cast<FixedVectorType>(Ty);
24343 unsigned NumElements = VTy->getNumElements();
24349 return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
24350 ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
24356 Value *Accumulator)
const {
24364 "Vector type must be either 64 or a power of 2 that is at least 128");
24366 if (TyWidth > 128) {
24371 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
24373 auto *LowerSplitA =
B.CreateShuffleVector(InputA, LowerSplitMask);
24374 auto *LowerSplitB =
B.CreateShuffleVector(InputB, LowerSplitMask);
24375 auto *UpperSplitA =
B.CreateShuffleVector(InputA, UpperSplitMask);
24376 auto *UpperSplitB =
B.CreateShuffleVector(InputB, UpperSplitMask);
24377 Value *LowerSplitAcc =
nullptr;
24378 Value *UpperSplitAcc =
nullptr;
24381 LowerSplitAcc =
B.CreateShuffleVector(
Accumulator, LowerSplitMask);
24382 UpperSplitAcc =
B.CreateShuffleVector(
Accumulator, UpperSplitMask);
24386 I, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
24388 I, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
24391 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
24395 Intrinsic::ID IdMap[4] = {Intrinsic::aarch64_neon_vcmla_rot0,
24396 Intrinsic::aarch64_neon_vcmla_rot90,
24397 Intrinsic::aarch64_neon_vcmla_rot180,
24398 Intrinsic::aarch64_neon_vcmla_rot270};
24403 return B.CreateIntrinsic(IdMap[(
int)Rotation], Ty,
24410 IntId = Intrinsic::aarch64_neon_vcadd_rot90;
24412 IntId = Intrinsic::aarch64_neon_vcadd_rot270;
24417 return B.CreateIntrinsic(IntId, Ty, {InputA, InputB});